tatami_chunked/OracularSlabCache_8hpp_source.html

#ifndef TATAMI_CHUNKED_ORACULAR_SLAB_CACHE_HPP

#define TATAMI_CHUNKED_ORACULAR_SLAB_CACHE_HPP


#include <unordered_map>

#include <vector>

#include <list>

#include <type_traits>

#include <memory>

#include <cstddef>


#include "tatami/tatami.hpp"

#include "sanisizer/sanisizer.hpp"


namespace tatami_chunked {


template<typename Id_, typename Index_, class Slab_, bool track_reuse_ = false>


class OracularSlabCache {

private:

    std::shared_ptr<const tatami::Oracle<Index_> > my_oracle;

    tatami::PredictionIndex my_total;

    tatami::PredictionIndex my_counter = 0;


    Id_ my_last_slab_id = 0;

    Slab_* my_last_slab = NULL;


    typedef std::vector<Slab_> SlabPool;

    typename SlabPool::size_type my_max_slabs;

    SlabPool my_all_slabs;


    std::unordered_map<Id_, Slab_*> my_current_cache, my_future_cache;

    std::vector<std::pair<Id_, Slab_*> > my_to_populate;

    std::vector<Id_> my_in_need;

    tatami::PredictionIndex my_refresh_point = 0;


    typename std::conditional<track_reuse_, std::vector<std::pair<Id_, Slab_*> >, bool>::type my_to_reuse;


public:

    template<typename MaxSlabs_>


    OracularSlabCache(std::shared_ptr<const tatami::Oracle<Index_> > oracle, MaxSlabs_ max_slabs) :

        my_oracle(std::move(oracle)),

        my_total(my_oracle->total()),

        my_max_slabs(sanisizer::cast<decltype(my_max_slabs)>(max_slabs))

    {

        my_all_slabs.reserve(max_slabs);

        my_current_cache.reserve(max_slabs);

        my_future_cache.reserve(max_slabs);

    }


    OracularSlabCache(const OracularSlabCache&) = delete;


    OracularSlabCache& operator=(const OracularSlabCache&) = delete;


    // Move operators are still okay as pointers still point to the moved vectors.

    // see https://stackoverflow.com/questions/43988553/stdvector-stdmove-and-pointer-invalidation.

    OracularSlabCache& operator=(OracularSlabCache&&) = default;

    OracularSlabCache(OracularSlabCache&&) = default;


    // Might as well define this.

    ~OracularSlabCache() = default;

public:


    Index_ next() {

        return my_oracle->get(my_counter++);

    }


public:

    template<class Ifunction_, class Cfunction_, class Pfunction_>


    std::pair<const Slab_*, Index_> next(Ifunction_ identify, Cfunction_ create, Pfunction_ populate) {

        Index_ index = this->next();

        auto slab_info = identify(index);

        if (slab_info.first == my_last_slab_id && my_last_slab) {

            return std::make_pair(my_last_slab, slab_info.second);

        }

        my_last_slab_id = slab_info.first;


        // Updating the cache if we hit the refresh point.

        if (my_counter - 1 == my_refresh_point) {

            // Note that, for any given populate cycle, the first prediction's

            // slab cannot already be in the cache, otherwise it would have

            // incorporated into the previous cycle. So we can skip some code.

            my_future_cache[slab_info.first] = NULL;

            my_in_need.push_back(slab_info.first);

            decltype(my_max_slabs) used_slabs = 1;

            auto last_future_slab_id = slab_info.first;


            while (++my_refresh_point < my_total) {

                auto future_index = my_oracle->get(my_refresh_point);

                auto future_slab_info = identify(future_index);

                if (last_future_slab_id == future_slab_info.first) {

                    continue;

                }


                last_future_slab_id = future_slab_info.first;

                if (my_future_cache.find(future_slab_info.first) != my_future_cache.end()) {

                    continue;

                }


                if (used_slabs == my_max_slabs) {

                    break;

                }

                ++used_slabs;


                auto ccIt = my_current_cache.find(future_slab_info.first);

                if (ccIt == my_current_cache.end()) {

                    my_future_cache[future_slab_info.first] = NULL;

                    my_in_need.push_back(future_slab_info.first);


                } else {

                    auto slab_ptr = ccIt->second;

                    my_future_cache[future_slab_info.first] = slab_ptr;

                    my_current_cache.erase(ccIt);

                    if constexpr(track_reuse_) {

                        my_to_reuse.emplace_back(future_slab_info.first, slab_ptr);

                    }

                }

            }


            auto cIt = my_current_cache.begin();

            for (auto a : my_in_need) {

                if (cIt != my_current_cache.end()) {

                    my_to_populate.emplace_back(a, cIt->second);

                    my_future_cache[a] = cIt->second;

                    ++cIt;

                } else {

                    // We reserved my_all_slabs so further push_backs() should not

                    // trigger any reallocation or invalidation of the pointers.

                    my_all_slabs.push_back(create());

                    auto slab_ptr = &(my_all_slabs.back());

                    my_to_populate.emplace_back(a, slab_ptr);

                    my_future_cache[a] = slab_ptr;

                }

            }

            my_in_need.clear();


            if constexpr(track_reuse_) {

                populate(my_to_populate, my_to_reuse);

            } else {

                populate(my_to_populate);

            }


            my_to_populate.clear();

            if constexpr(track_reuse_) {

                my_to_reuse.clear();

            }


            // We always fill my_future_cache to the brim so every entry of

            // my_all_slabs should be referenced by a pointer in

            // my_future_cache.  There shouldn't be any free cache entries

            // remaining in my_current_cache i.e., at this point, cIt should

            // equal my_current_cache.end(), as we transferred everything to

            // my_future_cache. Thus it is safe to clear my_current_cache

            // without worrying about leaking memory. The only exception is if

            // we run out of predictions, in which case it doesn't matter.

            my_current_cache.clear();

            my_current_cache.swap(my_future_cache);

        }


        // We know it must exist, so no need to check ccIt's validity.

        auto ccIt = my_current_cache.find(slab_info.first);

        my_last_slab = ccIt->second;

        return std::make_pair(my_last_slab, slab_info.second);

    }


public:


    auto get_max_slabs() const {

        return my_max_slabs;

    }


    auto get_num_slabs() const {

        return my_current_cache.size();

    }


};


}


#endif

tatami::Oracle

tatami_chunked::OracularSlabCache
Oracular-aware cache for slabs.
Definition OracularSlabCache.hpp:38

tatami_chunked::OracularSlabCache::OracularSlabCache
OracularSlabCache(std::shared_ptr< const tatami::Oracle< Index_ > > oracle, MaxSlabs_ max_slabs)
Definition OracularSlabCache.hpp:65

tatami_chunked::OracularSlabCache::next
Index_ next()
Definition OracularSlabCache.hpp:106

tatami_chunked::OracularSlabCache::get_num_slabs
auto get_num_slabs() const
Definition OracularSlabCache.hpp:255

tatami_chunked::OracularSlabCache::operator=
OracularSlabCache & operator=(const OracularSlabCache &)=delete

tatami_chunked::OracularSlabCache::OracularSlabCache
OracularSlabCache(const OracularSlabCache &)=delete

tatami_chunked::OracularSlabCache::next
std::pair< const Slab_ *, Index_ > next(Ifunction_ identify, Cfunction_ create, Pfunction_ populate)
Definition OracularSlabCache.hpp:146

tatami_chunked::OracularSlabCache::get_max_slabs
auto get_max_slabs() const
Definition OracularSlabCache.hpp:247

tatami_chunked
Methods to handle chunked tatami matrices.
Definition ChunkDimensionStats.hpp:11

tatami::PredictionIndex
std::size_t PredictionIndex

tatami.hpp