tatami_chunked
Helpers to create custom chunked tatami matrices
Loading...
Searching...
No Matches
OracularSubsettedSlabCache.hpp
Go to the documentation of this file.
1#ifndef TATAMI_CHUNKED_SUBSETTED_ORACLE_SLAB_CACHE_HPP
2#define TATAMI_CHUNKED_SUBSETTED_ORACLE_SLAB_CACHE_HPP
3
4#include <unordered_map>
5#include <vector>
6#include <list>
7#include <cstddef>
8
9#include "tatami/tatami.hpp"
10#include "sanisizer/sanisizer.hpp"
11
17namespace tatami_chunked {
18
27enum class OracularSubsettedSlabCacheSelectionType : char { FULL, BLOCK, INDEX };
28
33template<typename Index_>
39
48
54
60 Index_ block_end;
61
70 std::vector<Index_> indices;
71
77 std::unordered_map<Index_, Index_> mapping;
78};
79
83namespace OracularSubsettedSlabCache_internals {
84
85// We put these functions in here as struct{} usage policy forbids methods
86// (outside of the constructor). The Details class should be a passive data
87// carrier only.
88
89template<typename Index_>
90void fill_mapping_in_details(OracularSubsettedSlabCacheSelectionDetails<Index_>& details) {
91 auto num = details.indices.size();
92 for (decltype(num) i = 0; i < num; ++i) {
93 details.mapping[details.indices[i]] = i;
94 }
95}
96
97template<typename Index_>
98void set_details(OracularSubsettedSlabCacheSelectionDetails<Index_>& details, Index_ i) {
99 details.selection = OracularSubsettedSlabCacheSelectionType::BLOCK;
100 details.block_start = i;
101 details.block_end = i + 1;
102 details.indices.clear();
103 details.mapping.clear();
104}
105
106template<typename Index_>
107void add_to_details(OracularSubsettedSlabCacheSelectionDetails<Index_>& details, Index_ i) {
108 if (details.selection == OracularSubsettedSlabCacheSelectionType::FULL) {
109 return;
110 }
111
112 if (details.selection == OracularSubsettedSlabCacheSelectionType::BLOCK) {
113 if (i == details.block_end) {
114 details.block_end = i + 1;
115 return;
116
117 } else if (i + 1 == details.block_start) {
118 details.block_start = i;
119 return;
120
121 } else if (i >= details.block_start && i < details.block_end) {
122 return;
123 }
124
125 details.selection = OracularSubsettedSlabCacheSelectionType::INDEX;
126
127 // Don't replace this with tatami::resize_container_to_Index_size as this class might be used outside of the tatami::Matrix contract (i.e., Index_ might store values beyond std::size_t).
128 details.indices.resize(sanisizer::cast<decltype(details.indices.size())>(details.block_end - details.block_start));
129 std::iota(details.indices.begin(), details.indices.end(), details.block_start);
130 fill_mapping_in_details(details);
131 }
132
133 if (details.mapping.find(i) == details.mapping.end()) {
134 details.mapping[i] = details.indices.size();
135 details.indices.push_back(i);
136 }
137}
138
139template<typename Index_>
140void finalize_details(OracularSubsettedSlabCacheSelectionDetails<Index_>& details) {
141 if (details.selection == OracularSubsettedSlabCacheSelectionType::BLOCK) {
142 details.block_length = details.block_end - details.block_start;
143 } else if (details.selection == OracularSubsettedSlabCacheSelectionType::INDEX) {
144 if (!std::is_sorted(details.indices.begin(), details.indices.end())) {
145 std::sort(details.indices.begin(), details.indices.end());
146 fill_mapping_in_details(details);
147 }
148 }
149}
150
151}
168template<typename Id_, typename Index_, class Slab_>
170private:
171 std::shared_ptr<const tatami::Oracle<Index_> > my_oracle;
173 tatami::PredictionIndex my_counter = 0;
174
175 Index_ my_last_slab_id = 0;
176 Slab_* my_last_slab = NULL;
177
178 typedef std::vector<Slab_> SlabPool;
179 typename SlabPool::size_type my_max_slabs;
180 SlabPool my_all_slabs;
181 std::unordered_map<Id_, Slab_*> my_current_cache, my_future_cache;
182
183 std::vector<OracularSubsettedSlabCacheSelectionDetails<Index_> > my_all_subset_details;
184 std::vector<OracularSubsettedSlabCacheSelectionDetails<Index_>*> my_free_subset_details;
185 std::unordered_map<Id_, OracularSubsettedSlabCacheSelectionDetails<Index_>*> my_close_future_subset_cache, my_far_future_subset_cache;
186
187 tatami::PredictionIndex my_close_refresh_point = 0;
188 tatami::PredictionIndex my_far_refresh_point = 0;
189 Id_ my_far_slab_id;
190 Index_ my_far_slab_offset;
191
192 std::vector<std::pair<Id_, OracularSubsettedSlabCacheSelectionDetails<Index_>*> > my_to_reassign;
193 std::vector<std::tuple<Id_, Slab_*, const OracularSubsettedSlabCacheSelectionDetails<Index_>*> > my_to_populate;
194
195public:
201 template<typename MaxSlabs_>
202 OracularSubsettedSlabCache(std::shared_ptr<const tatami::Oracle<Index_> > oracle, MaxSlabs_ max_slabs) :
203 my_oracle(std::move(oracle)),
204 my_total(my_oracle->total()),
205 my_max_slabs(sanisizer::cast<decltype(my_max_slabs)>(max_slabs))
206 {
207 my_all_slabs.reserve(max_slabs);
208 my_current_cache.reserve(max_slabs);
209 my_future_cache.reserve(max_slabs);
210 my_close_future_subset_cache.reserve(max_slabs);
211 my_far_future_subset_cache.reserve(max_slabs);
212
213 my_all_subset_details.resize(sanisizer::product<decltype(my_all_subset_details.size())>(2, max_slabs));
214 for (auto& as : my_all_subset_details) {
215 my_free_subset_details.push_back(&as);
216 }
217 }
218
223
228
232 // Move operators are still okay as pointers still point to the moved vectors,
233 // see https://stackoverflow.com/questions/43988553/stdvector-stdmove-and-pointer-invalidation.
236
237 // Might as well define this.
238 ~OracularSubsettedSlabCache() = default;
243public:
250 Index_ next() {
251 return my_oracle->get(my_counter++);
252 }
253
254public:
281 template<class Ifunction_, class Cfunction_, class Pfunction_>
282 std::pair<const Slab_*, Index_> next(Ifunction_ identify, Cfunction_ create, Pfunction_ populate) {
283 Index_ index = this->next();
284 auto slab_info = identify(index);
285 if (slab_info.first == my_last_slab_id && my_last_slab) {
286 return std::make_pair(my_last_slab, slab_info.second);
287 }
288 my_last_slab_id = slab_info.first;
289
290 // Updating the cache if we hit the refresh point.
291 if (my_counter - 1 == my_close_refresh_point) {
292 if (my_all_slabs.empty()) {
293 // This section only runs once, at the start, to populate the my_close_future_subset_cache.
294 requisition_subset_close(slab_info.first, slab_info.second);
295 decltype(my_max_slabs) used_slabs = 1;
296
297 while (++my_close_refresh_point < my_total) {
298 auto future_index = my_oracle->get(my_close_refresh_point);
299 auto future_slab_info = identify(future_index);
300 auto cfcIt = my_close_future_subset_cache.find(future_slab_info.first);
301 if (cfcIt != my_close_future_subset_cache.end()) {
302 OracularSubsettedSlabCache_internals::add_to_details(*(cfcIt->second), future_slab_info.second);
303 } else if (used_slabs < my_max_slabs) {
304 requisition_subset_close(future_slab_info.first, future_slab_info.second);
305 ++used_slabs;
306 } else {
307 my_far_slab_id = future_slab_info.first;
308 my_far_slab_offset = future_slab_info.second;
309 break;
310 }
311 }
312
313 my_far_refresh_point = my_close_refresh_point;
314 } else {
315 my_close_refresh_point = my_far_refresh_point;
316 }
317
318 // Populating the far future cache.
319 if (my_far_refresh_point < my_total) {
320 requisition_subset_far(my_far_slab_id, my_far_slab_offset);
321 decltype(my_max_slabs) used_slabs = 1;
322
323 while (++my_far_refresh_point < my_total) {
324 auto future_index = my_oracle->get(my_far_refresh_point);
325 auto future_slab_info = identify(future_index);
326 auto ffcIt = my_far_future_subset_cache.find(future_slab_info.first);
327 if (ffcIt != my_far_future_subset_cache.end()) {
328 OracularSubsettedSlabCache_internals::add_to_details(*(ffcIt->second), future_slab_info.second);
329 } else if (used_slabs < my_max_slabs) {
330 requisition_subset_far(future_slab_info.first, future_slab_info.second);
331 ++used_slabs;
332 } else {
333 my_far_slab_id = future_slab_info.first;
334 my_far_slab_offset = future_slab_info.second;
335 break;
336 }
337 }
338 }
339
340 // Reusing slabs from my_current_cache; these should all have FULL selections already.
341 for (auto& cf : my_close_future_subset_cache) {
342 auto cIt = my_current_cache.find(cf.first);
343 if (cIt == my_current_cache.end()) {
344 my_to_reassign.emplace_back(cf.first, cf.second);
345 } else {
346 my_future_cache[cf.first] = cIt->second;
347 my_current_cache.erase(cIt);
348 }
349 }
350
351 // Creating new slabs for everything that's left.
352 auto cIt = my_current_cache.begin();
353 for (auto a : my_to_reassign) {
354 Slab_* slab_ptr;
355 if (cIt == my_current_cache.end()) {
356 my_all_slabs.emplace_back(create());
357 slab_ptr = &(my_all_slabs.back());
358 } else {
359 slab_ptr = cIt->second;
360 ++cIt;
361 }
362 my_future_cache[a.first] = slab_ptr;
363 OracularSubsettedSlabCache_internals::finalize_details(*(a.second));
364 my_to_populate.emplace_back(a.first, slab_ptr, a.second);
365 }
366 my_to_reassign.clear();
367
368 populate(my_to_populate);
369 my_to_populate.clear();
370
371 // We always fill my_future_cache to the brim so every entry of
372 // my_all_slabs should be referenced by a pointer in
373 // my_future_cache. There shouldn't be any free cache entries
374 // remaining in my_current_cache i.e., at this point, cIt should
375 // equal my_current_cache.end(), as we transferred everything to
376 // my_future_cache. Thus it is safe to clear my_current_cache
377 // without worrying about leaking memory. The only exception is if
378 // we run out of predictions, in which case it doesn't matter.
379 my_current_cache.clear();
380 my_current_cache.swap(my_future_cache);
381
382 // Putting the no-longer-used subset pointers back in the free pool
383 // before we swap the close and far futures.
384 for (auto& cfc : my_close_future_subset_cache) {
385 my_free_subset_details.push_back(cfc.second);
386 }
387 my_close_future_subset_cache.clear();
388 my_close_future_subset_cache.swap(my_far_future_subset_cache);
389 }
390
391 // We know it must exist, so no need to check ccIt's validity.
392 auto ccIt = my_current_cache.find(slab_info.first);
393 my_last_slab = ccIt->second;
394 return std::make_pair(my_last_slab, slab_info.second);
395 }
396
397private:
398 void requisition_subset_close(Id_ slab_id, Index_ slab_offset) {
399 auto selected = my_free_subset_details.back();
400 OracularSubsettedSlabCache_internals::set_details(*selected, slab_offset);
401 my_close_future_subset_cache[slab_id] = selected;
402 my_free_subset_details.pop_back();
403 }
404
405 void requisition_subset_far(Id_ slab_id, Index_ slab_offset) {
406 auto selected = my_free_subset_details.back();
407 OracularSubsettedSlabCache_internals::set_details(*selected, slab_offset);
408 my_far_future_subset_cache[slab_id] = selected;
409 my_free_subset_details.pop_back();
410
411 // If a slab is still being used in the far future, it might continue
412 // to be used in an even further future, in which case we need to do a
413 // FULL extraction just to be safe.
414 auto cfcIt = my_close_future_subset_cache.find(slab_id);
415 if (cfcIt != my_close_future_subset_cache.end()) {
416 selected->selection = OracularSubsettedSlabCacheSelectionType::FULL;
417 cfcIt->second->selection = OracularSubsettedSlabCacheSelectionType::FULL;
418 }
419 }
420
421public:
426 auto get_max_slabs() const {
427 return my_max_slabs;
428 }
429
434 auto get_num_slabs() const {
435 return my_current_cache.size();
436 }
437};
438
439}
440
441#endif
Oracle-aware cache for slabs, plus subsets.
Definition OracularSubsettedSlabCache.hpp:169
auto get_max_slabs() const
Definition OracularSubsettedSlabCache.hpp:426
auto get_num_slabs() const
Definition OracularSubsettedSlabCache.hpp:434
OracularSubsettedSlabCache & operator=(const OracularSubsettedSlabCache &)=delete
std::pair< const Slab_ *, Index_ > next(Ifunction_ identify, Cfunction_ create, Pfunction_ populate)
Definition OracularSubsettedSlabCache.hpp:282
OracularSubsettedSlabCache(const OracularSubsettedSlabCache &)=delete
Index_ next()
Definition OracularSubsettedSlabCache.hpp:250
OracularSubsettedSlabCache(std::shared_ptr< const tatami::Oracle< Index_ > > oracle, MaxSlabs_ max_slabs)
Definition OracularSubsettedSlabCache.hpp:202
Methods to handle chunked tatami matrices.
Definition ChunkDimensionStats.hpp:11
OracularSubsettedSlabCacheSelectionType
Definition OracularSubsettedSlabCache.hpp:27
std::size_t PredictionIndex
Details on the subset to extract in OracularSubsettedSlabCache.
Definition OracularSubsettedSlabCache.hpp:34
Index_ block_end
Definition OracularSubsettedSlabCache.hpp:60
Index_ block_length
Definition OracularSubsettedSlabCache.hpp:53
std::vector< Index_ > indices
Definition OracularSubsettedSlabCache.hpp:70
OracularSubsettedSlabCacheSelectionType selection
Definition OracularSubsettedSlabCache.hpp:38
Index_ block_start
Definition OracularSubsettedSlabCache.hpp:47
std::unordered_map< Index_, Index_ > mapping
Definition OracularSubsettedSlabCache.hpp:77