tatami_chunked
Helpers to create custom chunked tatami matrices
Loading...
Searching...
No Matches
OracularVariableSlabCache.hpp
Go to the documentation of this file.
1#ifndef TATAMI_CHUNKED_ORACULAR_VARIABLE_SLAB_CACHE_HPP
2#define TATAMI_CHUNKED_ORACULAR_VARIABLE_SLAB_CACHE_HPP
3
4#include <unordered_map>
5#include <vector>
6#include <list>
7#include <type_traits>
8#include <optional>
9#include <cstddef>
10
11#include "tatami/tatami.hpp"
12
18namespace tatami_chunked {
19
45template<typename Id_, typename Index_, class Slab_, typename Size_>
47private:
48 std::shared_ptr<const tatami::Oracle<Index_> > my_oracle;
50 tatami::PredictionIndex my_counter = 0;
51
52 typedef std::vector<Slab_> SlabPool;
53 typedef typename SlabPool::size_type SlabIndex;
54
55 Index_ my_last_slab_id = 0;
56 std::optional<SlabIndex> my_last_slab_num;
57
58 Size_ my_max_size, my_used_size = 0;
59 std::vector<Slab_> my_all_slabs;
60
61 // We need to hold an offset into 'my_all_slabs' rather than a pointer, as
62 // 'my_all_slabs' might be reallocated upon addition of new slabs, given that
63 // we don't know the maximum number of slabs ahead of time.
64 std::unordered_map<Id_, SlabIndex> my_current_cache, my_future_cache;
65 std::vector<std::pair<Id_, SlabIndex> > my_to_populate, my_to_reuse;
66 std::vector<Id_> my_in_need;
67 std::vector<SlabIndex> my_free_pool;
68 tatami::PredictionIndex my_refresh_point = 0;
69
70public:
76 OracularVariableSlabCache(std::shared_ptr<const tatami::Oracle<Index_> > oracle, std::size_t max_size) :
77 my_oracle(std::move(oracle)),
78 my_total(my_oracle->total()),
79 my_max_size(max_size)
80 {}
81
86
91
95 // Move operators are still okay as pointers still point to the moved vectors.
96 // see https://stackoverflow.com/questions/43988553/stdvector-stdmove-and-pointer-invalidation.
99
100 // Might as well define this.
101 ~OracularVariableSlabCache() = default;
106public:
113 Index_ next() {
114 return my_oracle->get(my_counter++);
115 }
116
117public:
162 template<class Ifunction_, class Ufunction_, class Afunction_, class Cfunction_, class Pfunction_>
163 std::pair<const Slab_*, Index_> next(Ifunction_ identify, Ufunction_ upper_size, Afunction_ actual_size, Cfunction_ create, Pfunction_ populate) {
164 Index_ index = this->next();
165 auto slab_info = identify(index);
166 if (slab_info.first == my_last_slab_id && my_last_slab_num.has_value()) {
167 return std::make_pair(my_all_slabs.data() + *my_last_slab_num, slab_info.second);
168 }
169 my_last_slab_id = slab_info.first;
170
171 // Updating the cache if we hit the refresh point.
172 if (my_counter - 1 == my_refresh_point) {
173 // Note that, for any given populate cycle, the first prediction's
174 // slab cannot already be in the cache, otherwise it would have
175 // incorporated into the previous cycle. So we can skip some code.
176 my_used_size = upper_size(slab_info.first);
177 requisition_new_slab(slab_info.first);
178
179 auto last_future_slab_id = slab_info.first;
180 while (++my_refresh_point < my_total) {
181 auto future_index = my_oracle->get(my_refresh_point);
182 auto future_slab_info = identify(future_index);
183 if (last_future_slab_id == future_slab_info.first) {
184 continue;
185 }
186
187 last_future_slab_id = future_slab_info.first;
188 if (my_future_cache.find(future_slab_info.first) != my_future_cache.end()) {
189 continue;
190 }
191
192 auto ccIt = my_current_cache.find(future_slab_info.first);
193 if (ccIt != my_current_cache.end()) {
194 auto slab_num = ccIt->second;
195 auto candidate = my_used_size + actual_size(future_slab_info.first, my_all_slabs[slab_num]);
196 if (candidate > my_max_size) {
197 break;
198 }
199 my_used_size = candidate;
200 my_future_cache[future_slab_info.first] = slab_num;
201 my_to_reuse.emplace_back(future_slab_info.first, slab_num);
202 my_current_cache.erase(ccIt);
203 } else {
204 auto candidate = my_used_size + upper_size(future_slab_info.first);
205 if (candidate > my_max_size) {
206 break;
207 }
208 my_used_size = candidate;
209 requisition_new_slab(future_slab_info.first);
210 }
211 }
212
213 auto cIt = my_current_cache.begin();
214 for (auto a : my_in_need) {
215 if (cIt != my_current_cache.end()) {
216 auto slab_num = cIt->second;
217 my_to_populate.emplace_back(a, slab_num);
218 my_future_cache[a] = slab_num;
219 ++cIt;
220 } else {
221 auto slab_num = my_all_slabs.size();
222 my_all_slabs.push_back(create());
223 my_to_populate.emplace_back(a, slab_num);
224 my_future_cache[a] = slab_num;
225 }
226 }
227 my_in_need.clear();
228
229 for (; cIt != my_current_cache.end(); ++cIt) {
230 my_free_pool.emplace_back(cIt->second);
231 }
232
233 populate(my_to_populate, my_to_reuse, my_all_slabs);
234 my_to_populate.clear();
235 my_to_reuse.clear();
236
237 my_current_cache.clear();
238 my_current_cache.swap(my_future_cache);
239 }
240
241 // We know it must exist, so no need to check ccIt's validity.
242 auto ccIt = my_current_cache.find(slab_info.first);
243 my_last_slab_num = ccIt->second;
244 return std::make_pair(my_all_slabs.data() + *my_last_slab_num, slab_info.second);
245 }
246
247private:
248 void requisition_new_slab(Id_ slab_id) {
249 if (!my_free_pool.empty()) {
250 auto slab_num = my_free_pool.back();
251 my_future_cache[slab_id] = slab_num;
252 my_free_pool.pop_back();
253 my_to_populate.emplace_back(slab_id, slab_num);
254 } else {
255 my_future_cache[slab_id] = 0;
256 my_in_need.push_back(slab_id);
257 }
258 }
259
260public:
266 auto get_max_size() const {
267 return my_max_size;
268 }
269
275 auto get_used_size() const {
276 return my_used_size;
277 }
278
283 auto get_num_slabs() const {
284 return my_current_cache.size();
285 }
286};
287
288}
289
290#endif
Oracle-aware cache for variable-size slabs.
Definition OracularVariableSlabCache.hpp:46
auto get_used_size() const
Definition OracularVariableSlabCache.hpp:275
auto get_num_slabs() const
Definition OracularVariableSlabCache.hpp:283
auto get_max_size() const
Definition OracularVariableSlabCache.hpp:266
std::pair< const Slab_ *, Index_ > next(Ifunction_ identify, Ufunction_ upper_size, Afunction_ actual_size, Cfunction_ create, Pfunction_ populate)
Definition OracularVariableSlabCache.hpp:163
OracularVariableSlabCache(const OracularVariableSlabCache &)=delete
OracularVariableSlabCache & operator=(const OracularVariableSlabCache &)=delete
OracularVariableSlabCache(std::shared_ptr< const tatami::Oracle< Index_ > > oracle, std::size_t max_size)
Definition OracularVariableSlabCache.hpp:76
Index_ next()
Definition OracularVariableSlabCache.hpp:113
Methods to handle chunked tatami matrices.
Definition ChunkDimensionStats.hpp:4
std::size_t PredictionIndex