tatami_chunked/OracularVariableSlabCache_8hpp_source.html

#ifndef TATAMI_CHUNKED_ORACULAR_VARIABLE_SLAB_CACHE_HPP

#define TATAMI_CHUNKED_ORACULAR_VARIABLE_SLAB_CACHE_HPP


#include <unordered_map>

#include <vector>

#include <list>

#include <type_traits>

#include <optional>

#include <cstddef>


#include "tatami/tatami.hpp"


namespace tatami_chunked {


template<typename Id_, typename Index_, class Slab_, typename Size_>


class OracularVariableSlabCache {

private:

    std::shared_ptr<const tatami::Oracle<Index_> > my_oracle;

    tatami::PredictionIndex my_total;

    tatami::PredictionIndex my_counter = 0;


    typedef std::vector<Slab_> SlabPool;

    typedef typename SlabPool::size_type SlabIndex;


    Index_ my_last_slab_id = 0;

    std::optional<SlabIndex> my_last_slab_num;


    Size_ my_max_size, my_used_size = 0;

    std::vector<Slab_> my_all_slabs;


    // We need to hold an offset into 'my_all_slabs' rather than a pointer, as

    // 'my_all_slabs' might be reallocated upon addition of new slabs, given that

    // we don't know the maximum number of slabs ahead of time.

    std::unordered_map<Id_, SlabIndex> my_current_cache, my_future_cache;

    std::vector<std::pair<Id_, SlabIndex> > my_to_populate, my_to_reuse;

    std::vector<Id_> my_in_need;

    std::vector<SlabIndex> my_free_pool;

    tatami::PredictionIndex my_refresh_point = 0;


public:


    OracularVariableSlabCache(std::shared_ptr<const tatami::Oracle<Index_> > oracle, std::size_t max_size) :

        my_oracle(std::move(oracle)),

        my_total(my_oracle->total()),

        my_max_size(max_size)

    {}


    OracularVariableSlabCache(const OracularVariableSlabCache&) = delete;


    OracularVariableSlabCache& operator=(const OracularVariableSlabCache&) = delete;


    // Move operators are still okay as pointers still point to the moved vectors.

    // see https://stackoverflow.com/questions/43988553/stdvector-stdmove-and-pointer-invalidation.

    OracularVariableSlabCache& operator=(OracularVariableSlabCache&&) = default;

    OracularVariableSlabCache(OracularVariableSlabCache&&) = default;


    // Might as well define this.

    ~OracularVariableSlabCache() = default;

public:


    Index_ next() {

        return my_oracle->get(my_counter++);

    }


public:

    template<class Ifunction_, class Ufunction_, class Afunction_, class Cfunction_, class Pfunction_>


    std::pair<const Slab_*, Index_> next(Ifunction_ identify, Ufunction_ upper_size, Afunction_ actual_size, Cfunction_ create, Pfunction_ populate) {

        Index_ index = this->next();

        auto slab_info = identify(index);

        if (slab_info.first == my_last_slab_id && my_last_slab_num.has_value()) {

            return std::make_pair(my_all_slabs.data() + *my_last_slab_num, slab_info.second);

        }

        my_last_slab_id = slab_info.first;


        // Updating the cache if we hit the refresh point.

        if (my_counter - 1 == my_refresh_point) {

            // Note that, for any given populate cycle, the first prediction's

            // slab cannot already be in the cache, otherwise it would have

            // incorporated into the previous cycle. So we can skip some code.

            my_used_size = upper_size(slab_info.first);

            requisition_new_slab(slab_info.first);


            auto last_future_slab_id = slab_info.first;

            while (++my_refresh_point < my_total) {

                auto future_index = my_oracle->get(my_refresh_point);

                auto future_slab_info = identify(future_index);

                if (last_future_slab_id == future_slab_info.first) {

                    continue;

                }


                last_future_slab_id = future_slab_info.first;

                if (my_future_cache.find(future_slab_info.first) != my_future_cache.end()) {

                    continue;

                }


                auto ccIt = my_current_cache.find(future_slab_info.first);

                if (ccIt != my_current_cache.end()) {

                    auto slab_num = ccIt->second;

                    auto candidate = my_used_size + actual_size(future_slab_info.first, my_all_slabs[slab_num]);

                    if (candidate > my_max_size) {

                        break;

                    }

                    my_used_size = candidate;

                    my_future_cache[future_slab_info.first] = slab_num;

                    my_to_reuse.emplace_back(future_slab_info.first, slab_num);

                    my_current_cache.erase(ccIt);

                } else {

                    auto candidate = my_used_size + upper_size(future_slab_info.first);

                    if (candidate > my_max_size) {

                        break;

                    }

                    my_used_size = candidate;

                    requisition_new_slab(future_slab_info.first);

                }

            }


            auto cIt = my_current_cache.begin();

            for (auto a : my_in_need) {

                if (cIt != my_current_cache.end()) {

                    auto slab_num = cIt->second;

                    my_to_populate.emplace_back(a, slab_num);

                    my_future_cache[a] = slab_num;

                    ++cIt;

                } else {

                    auto slab_num = my_all_slabs.size();

                    my_all_slabs.push_back(create());

                    my_to_populate.emplace_back(a, slab_num);

                    my_future_cache[a] = slab_num;

                }

            }

            my_in_need.clear();


            for (; cIt != my_current_cache.end(); ++cIt) {

                my_free_pool.emplace_back(cIt->second);

            }


            populate(my_to_populate, my_to_reuse, my_all_slabs);

            my_to_populate.clear();

            my_to_reuse.clear();


            my_current_cache.clear();

            my_current_cache.swap(my_future_cache);

        }


        // We know it must exist, so no need to check ccIt's validity.

        auto ccIt = my_current_cache.find(slab_info.first);

        my_last_slab_num = ccIt->second;

        return std::make_pair(my_all_slabs.data() + *my_last_slab_num, slab_info.second);

    }


private:

    void requisition_new_slab(Id_ slab_id) {

        if (!my_free_pool.empty()) {

            auto slab_num = my_free_pool.back();

            my_future_cache[slab_id] = slab_num;

            my_free_pool.pop_back();

            my_to_populate.emplace_back(slab_id, slab_num);

        } else {

            my_future_cache[slab_id] = 0;

            my_in_need.push_back(slab_id);

        }

    }


public:


    auto get_max_size() const {

        return my_max_size;

    }


    auto get_used_size() const {

        return my_used_size;

    }


    auto get_num_slabs() const {

        return my_current_cache.size();

    }


};


}


#endif

tatami::Oracle

tatami_chunked::OracularVariableSlabCache
Oracle-aware cache for variable-size slabs.
Definition OracularVariableSlabCache.hpp:46

tatami_chunked::OracularVariableSlabCache::get_used_size
auto get_used_size() const
Definition OracularVariableSlabCache.hpp:275

tatami_chunked::OracularVariableSlabCache::get_num_slabs
auto get_num_slabs() const
Definition OracularVariableSlabCache.hpp:283

tatami_chunked::OracularVariableSlabCache::get_max_size
auto get_max_size() const
Definition OracularVariableSlabCache.hpp:266

tatami_chunked::OracularVariableSlabCache::next
std::pair< const Slab_ *, Index_ > next(Ifunction_ identify, Ufunction_ upper_size, Afunction_ actual_size, Cfunction_ create, Pfunction_ populate)
Definition OracularVariableSlabCache.hpp:163

tatami_chunked::OracularVariableSlabCache::OracularVariableSlabCache
OracularVariableSlabCache(const OracularVariableSlabCache &)=delete

tatami_chunked::OracularVariableSlabCache::operator=
OracularVariableSlabCache & operator=(const OracularVariableSlabCache &)=delete

tatami_chunked::OracularVariableSlabCache::OracularVariableSlabCache
OracularVariableSlabCache(std::shared_ptr< const tatami::Oracle< Index_ > > oracle, std::size_t max_size)
Definition OracularVariableSlabCache.hpp:76

tatami_chunked::OracularVariableSlabCache::next
Index_ next()
Definition OracularVariableSlabCache.hpp:113

tatami_chunked
Methods to handle chunked tatami matrices.
Definition ChunkDimensionStats.hpp:11

tatami::PredictionIndex
std::size_t PredictionIndex

tatami.hpp