tatami_chunked
Helpers to create custom chunked tatami matrices
Loading...
Searching...
No Matches
OracularSubsettedSlabCache.hpp
Go to the documentation of this file.
1#ifndef TATAMI_CHUNKED_SUBSETTED_ORACLE_SLAB_CACHE_HPP
2#define TATAMI_CHUNKED_SUBSETTED_ORACLE_SLAB_CACHE_HPP
3
4#include <unordered_map>
5#include <vector>
6#include <list>
7#include <cstddef>
8
9#include "tatami/tatami.hpp"
10#include "sanisizer/sanisizer.hpp"
11
17namespace tatami_chunked {
18
27enum class OracularSubsettedSlabCacheSelectionType : char { FULL, BLOCK, INDEX };
28
33template<typename Index_>
39
48
54
60 Index_ block_end;
61
70 std::vector<Index_> indices;
71
77 std::unordered_map<Index_, Index_> mapping;
78};
79
83namespace OracularSubsettedSlabCache_internals {
84
85// We put these functions in here as struct{} usage policy forbids methods
86// (outside of the constructor). The Details class should be a passive data
87// carrier only.
88
89template<typename Index_>
90void fill_mapping_in_details(OracularSubsettedSlabCacheSelectionDetails<Index_>& details) {
91 auto num = details.indices.size();
92 for (decltype(num) i = 0; i < num; ++i) {
93 details.mapping[details.indices[i]] = i;
94 }
95}
96
97template<typename Index_>
98void set_details(OracularSubsettedSlabCacheSelectionDetails<Index_>& details, Index_ i) {
99 details.selection = OracularSubsettedSlabCacheSelectionType::BLOCK;
100 details.block_start = i;
101 details.block_end = i + 1;
102 details.indices.clear();
103 details.mapping.clear();
104}
105
106template<typename Index_>
107void add_to_details(OracularSubsettedSlabCacheSelectionDetails<Index_>& details, Index_ i) {
108 if (details.selection == OracularSubsettedSlabCacheSelectionType::FULL) {
109 return;
110 }
111
112 if (details.selection == OracularSubsettedSlabCacheSelectionType::BLOCK) {
113 if (i == details.block_end) {
114 details.block_end = i + 1;
115 return;
116
117 } else if (i + 1 == details.block_start) {
118 details.block_start = i;
119 return;
120
121 } else if (i >= details.block_start && i < details.block_end) {
122 return;
123 }
124
125 details.selection = OracularSubsettedSlabCacheSelectionType::INDEX;
126 tatami::resize_container_to_Index_size(details.indices, details.block_end - details.block_start);
127 std::iota(details.indices.begin(), details.indices.end(), details.block_start);
128 fill_mapping_in_details(details);
129 }
130
131 if (details.mapping.find(i) == details.mapping.end()) {
132 details.mapping[i] = details.indices.size();
133 details.indices.push_back(i);
134 }
135}
136
137template<typename Index_>
138void finalize_details(OracularSubsettedSlabCacheSelectionDetails<Index_>& details) {
139 if (details.selection == OracularSubsettedSlabCacheSelectionType::BLOCK) {
140 details.block_length = details.block_end - details.block_start;
141 } else if (details.selection == OracularSubsettedSlabCacheSelectionType::INDEX) {
142 if (!std::is_sorted(details.indices.begin(), details.indices.end())) {
143 std::sort(details.indices.begin(), details.indices.end());
144 fill_mapping_in_details(details);
145 }
146 }
147}
148
149}
167template<typename Id_, typename Index_, class Slab_>
169private:
170 std::shared_ptr<const tatami::Oracle<Index_> > my_oracle;
172 tatami::PredictionIndex my_counter = 0;
173
174 Index_ my_last_slab_id = 0;
175 Slab_* my_last_slab = NULL;
176
177 typedef std::vector<Slab_> SlabPool;
178 typename SlabPool::size_type my_max_slabs;
179 SlabPool my_all_slabs;
180 std::unordered_map<Id_, Slab_*> my_current_cache, my_future_cache;
181
182 std::vector<OracularSubsettedSlabCacheSelectionDetails<Index_> > my_all_subset_details;
183 std::vector<OracularSubsettedSlabCacheSelectionDetails<Index_>*> my_free_subset_details;
184 std::unordered_map<Id_, OracularSubsettedSlabCacheSelectionDetails<Index_>*> my_close_future_subset_cache, my_far_future_subset_cache;
185
186 tatami::PredictionIndex my_close_refresh_point = 0;
187 tatami::PredictionIndex my_far_refresh_point = 0;
188 Id_ my_far_slab_id;
189 Index_ my_far_slab_offset;
190
191 std::vector<std::pair<Id_, OracularSubsettedSlabCacheSelectionDetails<Index_>*> > my_to_reassign;
192 std::vector<std::tuple<Id_, Slab_*, const OracularSubsettedSlabCacheSelectionDetails<Index_>*> > my_to_populate;
193
194public:
200 template<typename MaxSlabs_>
201 OracularSubsettedSlabCache(std::shared_ptr<const tatami::Oracle<Index_> > oracle, MaxSlabs_ max_slabs) :
202 my_oracle(std::move(oracle)),
203 my_total(my_oracle->total()),
204 my_max_slabs(sanisizer::cast<decltype(my_max_slabs)>(max_slabs))
205 {
206 my_all_slabs.reserve(max_slabs);
207 my_current_cache.reserve(max_slabs);
208 my_future_cache.reserve(max_slabs);
209 my_close_future_subset_cache.reserve(max_slabs);
210 my_far_future_subset_cache.reserve(max_slabs);
211
212 my_all_subset_details.resize(sanisizer::product<decltype(my_all_subset_details.size())>(2, max_slabs));
213 for (auto& as : my_all_subset_details) {
214 my_free_subset_details.push_back(&as);
215 }
216 }
217
222
227
231 // Move operators are still okay as pointers still point to the moved vectors,
232 // see https://stackoverflow.com/questions/43988553/stdvector-stdmove-and-pointer-invalidation.
235
236 // Might as well define this.
237 ~OracularSubsettedSlabCache() = default;
242public:
249 Index_ next() {
250 return my_oracle->get(my_counter++);
251 }
252
253public:
280 template<class Ifunction_, class Cfunction_, class Pfunction_>
281 std::pair<const Slab_*, Index_> next(Ifunction_ identify, Cfunction_ create, Pfunction_ populate) {
282 Index_ index = this->next();
283 auto slab_info = identify(index);
284 if (slab_info.first == my_last_slab_id && my_last_slab) {
285 return std::make_pair(my_last_slab, slab_info.second);
286 }
287 my_last_slab_id = slab_info.first;
288
289 // Updating the cache if we hit the refresh point.
290 if (my_counter - 1 == my_close_refresh_point) {
291 if (my_all_slabs.empty()) {
292 // This section only runs once, at the start, to populate the my_close_future_subset_cache.
293 requisition_subset_close(slab_info.first, slab_info.second);
294 decltype(my_max_slabs) used_slabs = 1;
295
296 while (++my_close_refresh_point < my_total) {
297 auto future_index = my_oracle->get(my_close_refresh_point);
298 auto future_slab_info = identify(future_index);
299 auto cfcIt = my_close_future_subset_cache.find(future_slab_info.first);
300 if (cfcIt != my_close_future_subset_cache.end()) {
301 OracularSubsettedSlabCache_internals::add_to_details(*(cfcIt->second), future_slab_info.second);
302 } else if (used_slabs < my_max_slabs) {
303 requisition_subset_close(future_slab_info.first, future_slab_info.second);
304 ++used_slabs;
305 } else {
306 my_far_slab_id = future_slab_info.first;
307 my_far_slab_offset = future_slab_info.second;
308 break;
309 }
310 }
311
312 my_far_refresh_point = my_close_refresh_point;
313 } else {
314 my_close_refresh_point = my_far_refresh_point;
315 }
316
317 // Populating the far future cache.
318 if (my_far_refresh_point < my_total) {
319 requisition_subset_far(my_far_slab_id, my_far_slab_offset);
320 decltype(my_max_slabs) used_slabs = 1;
321
322 while (++my_far_refresh_point < my_total) {
323 auto future_index = my_oracle->get(my_far_refresh_point);
324 auto future_slab_info = identify(future_index);
325 auto ffcIt = my_far_future_subset_cache.find(future_slab_info.first);
326 if (ffcIt != my_far_future_subset_cache.end()) {
327 OracularSubsettedSlabCache_internals::add_to_details(*(ffcIt->second), future_slab_info.second);
328 } else if (used_slabs < my_max_slabs) {
329 requisition_subset_far(future_slab_info.first, future_slab_info.second);
330 ++used_slabs;
331 } else {
332 my_far_slab_id = future_slab_info.first;
333 my_far_slab_offset = future_slab_info.second;
334 break;
335 }
336 }
337 }
338
339 // Reusing slabs from my_current_cache; these should all have FULL selections already.
340 for (auto& cf : my_close_future_subset_cache) {
341 auto cIt = my_current_cache.find(cf.first);
342 if (cIt == my_current_cache.end()) {
343 my_to_reassign.emplace_back(cf.first, cf.second);
344 } else {
345 my_future_cache[cf.first] = cIt->second;
346 my_current_cache.erase(cIt);
347 }
348 }
349
350 // Creating new slabs for everything that's left.
351 auto cIt = my_current_cache.begin();
352 for (auto a : my_to_reassign) {
353 Slab_* slab_ptr;
354 if (cIt == my_current_cache.end()) {
355 my_all_slabs.emplace_back(create());
356 slab_ptr = &(my_all_slabs.back());
357 } else {
358 slab_ptr = cIt->second;
359 ++cIt;
360 }
361 my_future_cache[a.first] = slab_ptr;
362 OracularSubsettedSlabCache_internals::finalize_details(*(a.second));
363 my_to_populate.emplace_back(a.first, slab_ptr, a.second);
364 }
365 my_to_reassign.clear();
366
367 populate(my_to_populate);
368 my_to_populate.clear();
369
370 // We always fill my_future_cache to the brim so every entry of
371 // my_all_slabs should be referenced by a pointer in
372 // my_future_cache. There shouldn't be any free cache entries
373 // remaining in my_current_cache i.e., at this point, cIt should
374 // equal my_current_cache.end(), as we transferred everything to
375 // my_future_cache. Thus it is safe to clear my_current_cache
376 // without worrying about leaking memory. The only exception is if
377 // we run out of predictions, in which case it doesn't matter.
378 my_current_cache.clear();
379 my_current_cache.swap(my_future_cache);
380
381 // Putting the no-longer-used subset pointers back in the free pool
382 // before we swap the close and far futures.
383 for (auto& cfc : my_close_future_subset_cache) {
384 my_free_subset_details.push_back(cfc.second);
385 }
386 my_close_future_subset_cache.clear();
387 my_close_future_subset_cache.swap(my_far_future_subset_cache);
388 }
389
390 // We know it must exist, so no need to check ccIt's validity.
391 auto ccIt = my_current_cache.find(slab_info.first);
392 my_last_slab = ccIt->second;
393 return std::make_pair(my_last_slab, slab_info.second);
394 }
395
396private:
397 void requisition_subset_close(Id_ slab_id, Index_ slab_offset) {
398 auto selected = my_free_subset_details.back();
399 OracularSubsettedSlabCache_internals::set_details(*selected, slab_offset);
400 my_close_future_subset_cache[slab_id] = selected;
401 my_free_subset_details.pop_back();
402 }
403
404 void requisition_subset_far(Id_ slab_id, Index_ slab_offset) {
405 auto selected = my_free_subset_details.back();
406 OracularSubsettedSlabCache_internals::set_details(*selected, slab_offset);
407 my_far_future_subset_cache[slab_id] = selected;
408 my_free_subset_details.pop_back();
409
410 // If a slab is still being used in the far future, it might continue
411 // to be used in an even further future, in which case we need to do a
412 // FULL extraction just to be safe.
413 auto cfcIt = my_close_future_subset_cache.find(slab_id);
414 if (cfcIt != my_close_future_subset_cache.end()) {
415 selected->selection = OracularSubsettedSlabCacheSelectionType::FULL;
416 cfcIt->second->selection = OracularSubsettedSlabCacheSelectionType::FULL;
417 }
418 }
419
420public:
425 auto get_max_slabs() const {
426 return my_max_slabs;
427 }
428
433 auto get_num_slabs() const {
434 return my_current_cache.size();
435 }
436};
437
438}
439
440#endif
Oracle-aware cache for slabs, plus subsets.
Definition OracularSubsettedSlabCache.hpp:168
auto get_max_slabs() const
Definition OracularSubsettedSlabCache.hpp:425
auto get_num_slabs() const
Definition OracularSubsettedSlabCache.hpp:433
OracularSubsettedSlabCache & operator=(const OracularSubsettedSlabCache &)=delete
std::pair< const Slab_ *, Index_ > next(Ifunction_ identify, Cfunction_ create, Pfunction_ populate)
Definition OracularSubsettedSlabCache.hpp:281
OracularSubsettedSlabCache(const OracularSubsettedSlabCache &)=delete
Index_ next()
Definition OracularSubsettedSlabCache.hpp:249
OracularSubsettedSlabCache(std::shared_ptr< const tatami::Oracle< Index_ > > oracle, MaxSlabs_ max_slabs)
Definition OracularSubsettedSlabCache.hpp:201
Methods to handle chunked tatami matrices.
Definition ChunkDimensionStats.hpp:4
OracularSubsettedSlabCacheSelectionType
Definition OracularSubsettedSlabCache.hpp:27
void resize_container_to_Index_size(Container_ &container, Index_ x)
std::size_t PredictionIndex
Details on the subset to extract in OracularSubsettedSlabCache.
Definition OracularSubsettedSlabCache.hpp:34
Index_ block_end
Definition OracularSubsettedSlabCache.hpp:60
Index_ block_length
Definition OracularSubsettedSlabCache.hpp:53
std::vector< Index_ > indices
Definition OracularSubsettedSlabCache.hpp:70
OracularSubsettedSlabCacheSelectionType selection
Definition OracularSubsettedSlabCache.hpp:38
Index_ block_start
Definition OracularSubsettedSlabCache.hpp:47
std::unordered_map< Index_, Index_ > mapping
Definition OracularSubsettedSlabCache.hpp:77