tatami_chunked
Helpers to create custom chunked tatami matrices
Loading...
Searching...
No Matches
OracularSubsettedSlabCache.hpp
Go to the documentation of this file.
1#ifndef TATAMI_CHUNKED_SUBSETTED_ORACLE_SLAB_CACHE_HPP
2#define TATAMI_CHUNKED_SUBSETTED_ORACLE_SLAB_CACHE_HPP
3
4#include "utils.hpp"
5
6#include <unordered_map>
7#include <vector>
8#include <list>
9#include <cstddef>
10
11#include "tatami/tatami.hpp"
12#include "sanisizer/sanisizer.hpp"
13
19namespace tatami_chunked {
20
29enum class OracularSubsettedSlabCacheSelectionType : char { FULL, BLOCK, INDEX };
30
35template<typename Index_>
41
50
56
62 Index_ block_end;
63
72 std::vector<Index_> indices;
73
79 std::unordered_map<Index_, Index_> mapping;
80};
81
85namespace OracularSubsettedSlabCache_internals {
86
87// We put these functions in here as struct{} usage policy forbids methods
88// (outside of the constructor). The Details class should be a passive data
89// carrier only.
90
91template<typename Index_>
92void fill_mapping_in_details(OracularSubsettedSlabCacheSelectionDetails<Index_>& details) {
93 auto num = details.indices.size();
94 for (I<decltype(num)> i = 0; i < num; ++i) {
95 details.mapping[details.indices[i]] = i;
96 }
97}
98
99template<typename Index_>
100void set_details(OracularSubsettedSlabCacheSelectionDetails<Index_>& details, Index_ i) {
101 details.selection = OracularSubsettedSlabCacheSelectionType::BLOCK;
102 details.block_start = i;
103 details.block_end = i + 1;
104 details.indices.clear();
105 details.mapping.clear();
106}
107
108template<typename Index_>
109void add_to_details(OracularSubsettedSlabCacheSelectionDetails<Index_>& details, Index_ i) {
110 if (details.selection == OracularSubsettedSlabCacheSelectionType::FULL) {
111 return;
112 }
113
114 if (details.selection == OracularSubsettedSlabCacheSelectionType::BLOCK) {
115 if (i == details.block_end) {
116 details.block_end = i + 1;
117 return;
118
119 } else if (i + 1 == details.block_start) {
120 details.block_start = i;
121 return;
122
123 } else if (i >= details.block_start && i < details.block_end) {
124 return;
125 }
126
127 details.selection = OracularSubsettedSlabCacheSelectionType::INDEX;
128
129 // Don't replace this with tatami::resize_container_to_Index_size as this class might be used outside of the tatami::Matrix contract (i.e., Index_ might store values beyond std::size_t).
130 details.indices.resize(sanisizer::cast<I<decltype(details.indices.size())> >(details.block_end - details.block_start));
131 std::iota(details.indices.begin(), details.indices.end(), details.block_start);
132 fill_mapping_in_details(details);
133 }
134
135 if (details.mapping.find(i) == details.mapping.end()) {
136 details.mapping[i] = details.indices.size();
137 details.indices.push_back(i);
138 }
139}
140
141template<typename Index_>
142void finalize_details(OracularSubsettedSlabCacheSelectionDetails<Index_>& details) {
143 if (details.selection == OracularSubsettedSlabCacheSelectionType::BLOCK) {
144 details.block_length = details.block_end - details.block_start;
145 } else if (details.selection == OracularSubsettedSlabCacheSelectionType::INDEX) {
146 if (!std::is_sorted(details.indices.begin(), details.indices.end())) {
147 std::sort(details.indices.begin(), details.indices.end());
148 fill_mapping_in_details(details);
149 }
150 }
151}
152
153}
170template<typename Id_, typename Index_, class Slab_>
172private:
173 std::shared_ptr<const tatami::Oracle<Index_> > my_oracle;
175 tatami::PredictionIndex my_counter = 0;
176
177 Index_ my_last_slab_id = 0;
178 Slab_* my_last_slab = NULL;
179
180 typedef std::vector<Slab_> SlabPool;
181 typename SlabPool::size_type my_max_slabs;
182 SlabPool my_all_slabs;
183 std::unordered_map<Id_, Slab_*> my_current_cache, my_future_cache;
184
185 std::vector<OracularSubsettedSlabCacheSelectionDetails<Index_> > my_all_subset_details;
186 std::vector<OracularSubsettedSlabCacheSelectionDetails<Index_>*> my_free_subset_details;
187 std::unordered_map<Id_, OracularSubsettedSlabCacheSelectionDetails<Index_>*> my_close_future_subset_cache, my_far_future_subset_cache;
188
189 tatami::PredictionIndex my_close_refresh_point = 0;
190 tatami::PredictionIndex my_far_refresh_point = 0;
191 Id_ my_far_slab_id;
192 Index_ my_far_slab_offset;
193
194 std::vector<std::pair<Id_, OracularSubsettedSlabCacheSelectionDetails<Index_>*> > my_to_reassign;
195 std::vector<std::tuple<Id_, Slab_*, const OracularSubsettedSlabCacheSelectionDetails<Index_>*> > my_to_populate;
196
197public:
203 template<typename MaxSlabs_>
204 OracularSubsettedSlabCache(std::shared_ptr<const tatami::Oracle<Index_> > oracle, MaxSlabs_ max_slabs) :
205 my_oracle(std::move(oracle)),
206 my_total(my_oracle->total()),
207 my_max_slabs(sanisizer::cast<I<decltype(my_max_slabs)> >(max_slabs))
208 {
209 my_all_slabs.reserve(max_slabs);
210 my_current_cache.reserve(max_slabs);
211 my_future_cache.reserve(max_slabs);
212 my_close_future_subset_cache.reserve(max_slabs);
213 my_far_future_subset_cache.reserve(max_slabs);
214
215 my_all_subset_details.resize(sanisizer::product<I<decltype(my_all_subset_details.size())> >(2, max_slabs));
216 for (auto& as : my_all_subset_details) {
217 my_free_subset_details.push_back(&as);
218 }
219 }
220
225
230
234 // Move operators are still okay as pointers still point to the moved vectors,
235 // see https://stackoverflow.com/questions/43988553/stdvector-stdmove-and-pointer-invalidation.
238
239 // Might as well define this.
240 ~OracularSubsettedSlabCache() = default;
245public:
252 Index_ next() {
253 return my_oracle->get(my_counter++);
254 }
255
256public:
283 template<class Ifunction_, class Cfunction_, class Pfunction_>
284 std::pair<const Slab_*, Index_> next(Ifunction_ identify, Cfunction_ create, Pfunction_ populate) {
285 Index_ index = this->next();
286 auto slab_info = identify(index);
287 if (slab_info.first == my_last_slab_id && my_last_slab) {
288 return std::make_pair(my_last_slab, slab_info.second);
289 }
290 my_last_slab_id = slab_info.first;
291
292 // Updating the cache if we hit the refresh point.
293 if (my_counter - 1 == my_close_refresh_point) {
294 if (my_all_slabs.empty()) {
295 // This section only runs once, at the start, to populate the my_close_future_subset_cache.
296 requisition_subset_close(slab_info.first, slab_info.second);
297 I<decltype(my_max_slabs)> used_slabs = 1;
298
299 while (++my_close_refresh_point < my_total) {
300 auto future_index = my_oracle->get(my_close_refresh_point);
301 auto future_slab_info = identify(future_index);
302 auto cfcIt = my_close_future_subset_cache.find(future_slab_info.first);
303 if (cfcIt != my_close_future_subset_cache.end()) {
304 OracularSubsettedSlabCache_internals::add_to_details(*(cfcIt->second), future_slab_info.second);
305 } else if (used_slabs < my_max_slabs) {
306 requisition_subset_close(future_slab_info.first, future_slab_info.second);
307 ++used_slabs;
308 } else {
309 my_far_slab_id = future_slab_info.first;
310 my_far_slab_offset = future_slab_info.second;
311 break;
312 }
313 }
314
315 my_far_refresh_point = my_close_refresh_point;
316 } else {
317 my_close_refresh_point = my_far_refresh_point;
318 }
319
320 // Populating the far future cache.
321 if (my_far_refresh_point < my_total) {
322 requisition_subset_far(my_far_slab_id, my_far_slab_offset);
323 I<decltype(my_max_slabs)> used_slabs = 1;
324
325 while (++my_far_refresh_point < my_total) {
326 auto future_index = my_oracle->get(my_far_refresh_point);
327 auto future_slab_info = identify(future_index);
328 auto ffcIt = my_far_future_subset_cache.find(future_slab_info.first);
329 if (ffcIt != my_far_future_subset_cache.end()) {
330 OracularSubsettedSlabCache_internals::add_to_details(*(ffcIt->second), future_slab_info.second);
331 } else if (used_slabs < my_max_slabs) {
332 requisition_subset_far(future_slab_info.first, future_slab_info.second);
333 ++used_slabs;
334 } else {
335 my_far_slab_id = future_slab_info.first;
336 my_far_slab_offset = future_slab_info.second;
337 break;
338 }
339 }
340 }
341
342 // Reusing slabs from my_current_cache; these should all have FULL selections already.
343 for (auto& cf : my_close_future_subset_cache) {
344 auto cIt = my_current_cache.find(cf.first);
345 if (cIt == my_current_cache.end()) {
346 my_to_reassign.emplace_back(cf.first, cf.second);
347 } else {
348 my_future_cache[cf.first] = cIt->second;
349 my_current_cache.erase(cIt);
350 }
351 }
352
353 // Creating new slabs for everything that's left.
354 auto cIt = my_current_cache.begin();
355 for (auto a : my_to_reassign) {
356 Slab_* slab_ptr;
357 if (cIt == my_current_cache.end()) {
358 my_all_slabs.emplace_back(create());
359 slab_ptr = &(my_all_slabs.back());
360 } else {
361 slab_ptr = cIt->second;
362 ++cIt;
363 }
364 my_future_cache[a.first] = slab_ptr;
365 OracularSubsettedSlabCache_internals::finalize_details(*(a.second));
366 my_to_populate.emplace_back(a.first, slab_ptr, a.second);
367 }
368 my_to_reassign.clear();
369
370 populate(my_to_populate);
371 my_to_populate.clear();
372
373 // We always fill my_future_cache to the brim so every entry of
374 // my_all_slabs should be referenced by a pointer in
375 // my_future_cache. There shouldn't be any free cache entries
376 // remaining in my_current_cache i.e., at this point, cIt should
377 // equal my_current_cache.end(), as we transferred everything to
378 // my_future_cache. Thus it is safe to clear my_current_cache
379 // without worrying about leaking memory. The only exception is if
380 // we run out of predictions, in which case it doesn't matter.
381 my_current_cache.clear();
382 my_current_cache.swap(my_future_cache);
383
384 // Putting the no-longer-used subset pointers back in the free pool
385 // before we swap the close and far futures.
386 for (auto& cfc : my_close_future_subset_cache) {
387 my_free_subset_details.push_back(cfc.second);
388 }
389 my_close_future_subset_cache.clear();
390 my_close_future_subset_cache.swap(my_far_future_subset_cache);
391 }
392
393 // We know it must exist, so no need to check ccIt's validity.
394 auto ccIt = my_current_cache.find(slab_info.first);
395 my_last_slab = ccIt->second;
396 return std::make_pair(my_last_slab, slab_info.second);
397 }
398
399private:
400 void requisition_subset_close(Id_ slab_id, Index_ slab_offset) {
401 auto selected = my_free_subset_details.back();
402 OracularSubsettedSlabCache_internals::set_details(*selected, slab_offset);
403 my_close_future_subset_cache[slab_id] = selected;
404 my_free_subset_details.pop_back();
405 }
406
407 void requisition_subset_far(Id_ slab_id, Index_ slab_offset) {
408 auto selected = my_free_subset_details.back();
409 OracularSubsettedSlabCache_internals::set_details(*selected, slab_offset);
410 my_far_future_subset_cache[slab_id] = selected;
411 my_free_subset_details.pop_back();
412
413 // If a slab is still being used in the far future, it might continue
414 // to be used in an even further future, in which case we need to do a
415 // FULL extraction just to be safe.
416 auto cfcIt = my_close_future_subset_cache.find(slab_id);
417 if (cfcIt != my_close_future_subset_cache.end()) {
418 selected->selection = OracularSubsettedSlabCacheSelectionType::FULL;
419 cfcIt->second->selection = OracularSubsettedSlabCacheSelectionType::FULL;
420 }
421 }
422
423public:
428 auto get_max_slabs() const {
429 return my_max_slabs;
430 }
431
436 auto get_num_slabs() const {
437 return my_current_cache.size();
438 }
439};
440
441}
442
443#endif
Oracle-aware cache for slabs, plus subsets.
Definition OracularSubsettedSlabCache.hpp:171
auto get_max_slabs() const
Definition OracularSubsettedSlabCache.hpp:428
auto get_num_slabs() const
Definition OracularSubsettedSlabCache.hpp:436
OracularSubsettedSlabCache & operator=(const OracularSubsettedSlabCache &)=delete
std::pair< const Slab_ *, Index_ > next(Ifunction_ identify, Cfunction_ create, Pfunction_ populate)
Definition OracularSubsettedSlabCache.hpp:284
OracularSubsettedSlabCache(const OracularSubsettedSlabCache &)=delete
Index_ next()
Definition OracularSubsettedSlabCache.hpp:252
OracularSubsettedSlabCache(std::shared_ptr< const tatami::Oracle< Index_ > > oracle, MaxSlabs_ max_slabs)
Definition OracularSubsettedSlabCache.hpp:204
Methods to handle chunked tatami matrices.
Definition ChunkDimensionStats.hpp:11
OracularSubsettedSlabCacheSelectionType
Definition OracularSubsettedSlabCache.hpp:29
std::size_t PredictionIndex
Details on the subset to extract in OracularSubsettedSlabCache.
Definition OracularSubsettedSlabCache.hpp:36
Index_ block_end
Definition OracularSubsettedSlabCache.hpp:62
Index_ block_length
Definition OracularSubsettedSlabCache.hpp:55
std::vector< Index_ > indices
Definition OracularSubsettedSlabCache.hpp:72
OracularSubsettedSlabCacheSelectionType selection
Definition OracularSubsettedSlabCache.hpp:40
Index_ block_start
Definition OracularSubsettedSlabCache.hpp:49
std::unordered_map< Index_, Index_ > mapping
Definition OracularSubsettedSlabCache.hpp:79