tatami_chunked
Helpers to create custom chunked tatami matrices
Loading...
Searching...
No Matches
OracularSubsettedSlabCache.hpp
Go to the documentation of this file.
1#ifndef TATAMI_CHUNKED_SUBSETTED_ORACLE_SLAB_CACHE_HPP
2#define TATAMI_CHUNKED_SUBSETTED_ORACLE_SLAB_CACHE_HPP
3
4#include <unordered_map>
5#include <vector>
6#include <list>
7#include "tatami/tatami.hpp"
8
14namespace tatami_chunked {
15
24enum class OracularSubsettedSlabCacheSelectionType : char { FULL, BLOCK, INDEX };
25
30template<typename Index_>
76
81
82// We put these functions in here as struct{} usage policy forbids methods
83// (outside of the constructor). The Details class should be a passive data
84// carrier only.
85
86template<typename Index_>
88 for (size_t i = 0, end = details.indices.size(); i < end; ++i) {
89 details.mapping[details.indices[i]] = i;
90 }
91}
92
93template<typename Index_>
95 details.selection = OracularSubsettedSlabCacheSelectionType::BLOCK;
96 details.block_start = i;
97 details.block_end = i + 1;
98 details.indices.clear();
99 details.mapping.clear();
100}
101
102template<typename Index_>
104 if (details.selection == OracularSubsettedSlabCacheSelectionType::FULL) {
105 return;
106 }
107
108 if (details.selection == OracularSubsettedSlabCacheSelectionType::BLOCK) {
109 if (i == details.block_end) {
110 details.block_end = i + 1;
111 return;
112
113 } else if (i + 1 == details.block_start) {
114 details.block_start = i;
115 return;
116
117 } else if (i >= details.block_start && i < details.block_end) {
118 return;
119 }
120
121 details.selection = OracularSubsettedSlabCacheSelectionType::INDEX;
122 details.indices.resize(details.block_end - details.block_start);
123 std::iota(details.indices.begin(), details.indices.end(), details.block_start);
125 }
126
127 if (details.mapping.find(i) == details.mapping.end()) {
128 details.mapping[i] = details.indices.size();
129 details.indices.push_back(i);
130 }
131}
132
133template<typename Index_>
135 if (details.selection == OracularSubsettedSlabCacheSelectionType::BLOCK) {
136 details.block_length = details.block_end - details.block_start;
137 } else if (details.selection == OracularSubsettedSlabCacheSelectionType::INDEX) {
138 if (!std::is_sorted(details.indices.begin(), details.indices.end())) {
139 std::sort(details.indices.begin(), details.indices.end());
141 }
142 }
143}
144
145}
163template<typename Id_, typename Index_, class Slab_>
165private:
166 std::shared_ptr<const tatami::Oracle<Index_> > my_oracle;
167 size_t my_total;
168 size_t my_counter = 0;
169
170 Index_ my_last_slab_id = 0;
171 Slab_* my_last_slab = NULL;
172
173 size_t my_max_slabs;
174 std::vector<Slab_> my_all_slabs;
175 std::unordered_map<Id_, Slab_*> my_current_cache, my_future_cache;
176
177 std::vector<OracularSubsettedSlabCacheSelectionDetails<Index_> > my_all_subset_details;
178 std::vector<OracularSubsettedSlabCacheSelectionDetails<Index_>*> my_free_subset_details;
179 std::unordered_map<Id_, OracularSubsettedSlabCacheSelectionDetails<Index_>*> my_close_future_subset_cache, my_far_future_subset_cache;
180 size_t my_close_refresh_point = 0;
181 size_t my_far_refresh_point = 0;
182 Id_ my_far_slab_id;
183 Index_ my_far_slab_offset;
184
185 std::vector<std::pair<Id_, OracularSubsettedSlabCacheSelectionDetails<Index_>*> > my_to_reassign;
186 std::vector<std::tuple<Id_, Slab_*, const OracularSubsettedSlabCacheSelectionDetails<Index_>*> > my_to_populate;
187
188public:
194 my_oracle(std::move(oracle)),
195 my_total(my_oracle->total()),
196 my_max_slabs(max_slabs)
197 {
198 my_all_slabs.reserve(max_slabs);
199 my_current_cache.reserve(max_slabs);
200 my_future_cache.reserve(max_slabs);
201 my_close_future_subset_cache.reserve(max_slabs);
202 my_far_future_subset_cache.reserve(max_slabs);
203
204 my_all_subset_details.resize(max_slabs * 2);
205 for (auto& as : my_all_subset_details) {
206 my_free_subset_details.push_back(&as);
207 }
208 }
209
214
219
223 // Move operators are still okay as pointers still point to the moved vectors,
224 // see https://stackoverflow.com/questions/43988553/stdvector-stdmove-and-pointer-invalidation.
227
228 // Might as well define this.
229 ~OracularSubsettedSlabCache() = default;
234public:
242 return my_oracle->get(my_counter++);
243 }
244
245public:
272 template<class Ifunction_, class Cfunction_, class Pfunction_>
273 std::pair<const Slab_*, Index_> next(Ifunction_ identify, Cfunction_ create, Pfunction_ populate) {
274 Index_ index = this->next();
275 auto slab_info = identify(index);
276 if (slab_info.first == my_last_slab_id && my_last_slab) {
277 return std::make_pair(my_last_slab, slab_info.second);
278 }
279 my_last_slab_id = slab_info.first;
280
281 // Updating the cache if we hit the refresh point.
282 if (my_counter - 1 == my_close_refresh_point) {
283 if (my_all_slabs.empty()) {
284 // This section only runs once, at the start, to populate the my_close_future_subset_cache.
285 requisition_subset_close(slab_info.first, slab_info.second);
286 size_t used_slabs = 1;
287
288 while (++my_close_refresh_point < my_total) {
289 auto future_index = my_oracle->get(my_close_refresh_point);
291 auto cfcIt = my_close_future_subset_cache.find(future_slab_info.first);
292 if (cfcIt != my_close_future_subset_cache.end()) {
293 OracularSubsettedSlabCache_internals::add_to_details(*(cfcIt->second), future_slab_info.second);
294 } else if (used_slabs < my_max_slabs) {
295 requisition_subset_close(future_slab_info.first, future_slab_info.second);
296 ++used_slabs;
297 } else {
298 my_far_slab_id = future_slab_info.first;
299 my_far_slab_offset = future_slab_info.second;
300 break;
301 }
302 }
303
304 my_far_refresh_point = my_close_refresh_point;
305 } else {
306 my_close_refresh_point = my_far_refresh_point;
307 }
308
309 // Populating the far future cache.
310 if (my_far_refresh_point < my_total) {
311 requisition_subset_far(my_far_slab_id, my_far_slab_offset);
312 size_t used_slabs = 1;
313
314 while (++my_far_refresh_point < my_total) {
315 auto future_index = my_oracle->get(my_far_refresh_point);
317 auto ffcIt = my_far_future_subset_cache.find(future_slab_info.first);
318 if (ffcIt != my_far_future_subset_cache.end()) {
319 OracularSubsettedSlabCache_internals::add_to_details(*(ffcIt->second), future_slab_info.second);
320 } else if (used_slabs < my_max_slabs) {
321 requisition_subset_far(future_slab_info.first, future_slab_info.second);
322 ++used_slabs;
323 } else {
324 my_far_slab_id = future_slab_info.first;
325 my_far_slab_offset = future_slab_info.second;
326 break;
327 }
328 }
329 }
330
331 // Reusing slabs from my_current_cache; these should all have FULL selections already.
332 for (auto& cf : my_close_future_subset_cache) {
333 auto cIt = my_current_cache.find(cf.first);
334 if (cIt == my_current_cache.end()) {
335 my_to_reassign.emplace_back(cf.first, cf.second);
336 } else {
337 my_future_cache[cf.first] = cIt->second;
338 my_current_cache.erase(cIt);
339 }
340 }
341
342 // Creating new slabs for everything that's left.
343 auto cIt = my_current_cache.begin();
344 for (auto a : my_to_reassign) {
346 if (cIt == my_current_cache.end()) {
347 my_all_slabs.emplace_back(create());
348 slab_ptr = &(my_all_slabs.back());
349 } else {
350 slab_ptr = cIt->second;
351 ++cIt;
352 }
353 my_future_cache[a.first] = slab_ptr;
354 OracularSubsettedSlabCache_internals::finalize_details(*(a.second));
355 my_to_populate.emplace_back(a.first, slab_ptr, a.second);
356 }
357 my_to_reassign.clear();
358
359 populate(my_to_populate);
360 my_to_populate.clear();
361
362 // We always fill my_future_cache to the brim so every entry of
363 // my_all_slabs should be referenced by a pointer in
364 // my_future_cache. There shouldn't be any free cache entries
365 // remaining in my_current_cache i.e., at this point, cIt should
366 // equal my_current_cache.end(), as we transferred everything to
367 // my_future_cache. Thus it is safe to clear my_current_cache
368 // without worrying about leaking memory. The only exception is if
369 // we run out of predictions, in which case it doesn't matter.
370 my_current_cache.clear();
371 my_current_cache.swap(my_future_cache);
372
373 // Putting the no-longer-used subset pointers back in the free pool
374 // before we swap the close and far futures.
375 for (auto& cfc : my_close_future_subset_cache) {
376 my_free_subset_details.push_back(cfc.second);
377 }
378 my_close_future_subset_cache.clear();
379 my_close_future_subset_cache.swap(my_far_future_subset_cache);
380 }
381
382 // We know it must exist, so no need to check ccIt's validity.
383 auto ccIt = my_current_cache.find(slab_info.first);
384 my_last_slab = ccIt->second;
385 return std::make_pair(my_last_slab, slab_info.second);
386 }
387
388private:
389 void requisition_subset_close(Id_ slab_id, Index_ slab_offset) {
390 auto selected = my_free_subset_details.back();
391 OracularSubsettedSlabCache_internals::set_details(*selected, slab_offset);
392 my_close_future_subset_cache[slab_id] = selected;
393 my_free_subset_details.pop_back();
394 }
395
396 void requisition_subset_far(Id_ slab_id, Index_ slab_offset) {
397 auto selected = my_free_subset_details.back();
398 OracularSubsettedSlabCache_internals::set_details(*selected, slab_offset);
399 my_far_future_subset_cache[slab_id] = selected;
400 my_free_subset_details.pop_back();
401
402 // If a slab is still being used in the far future, it might continue
403 // to be used in an even further future, in which case we need to do a
404 // FULL extraction just to be safe.
405 auto cfcIt = my_close_future_subset_cache.find(slab_id);
406 if (cfcIt != my_close_future_subset_cache.end()) {
407 selected->selection = OracularSubsettedSlabCacheSelectionType::FULL;
408 cfcIt->second->selection = OracularSubsettedSlabCacheSelectionType::FULL;
409 }
410 }
411
412public:
416 size_t get_max_slabs() const {
417 return my_max_slabs;
418 }
419
423 size_t get_num_slabs() const {
424 return my_current_cache.size();
425 }
426};
427
428}
429
430#endif
Oracle-aware cache for slabs, plus subsets.
Definition OracularSubsettedSlabCache.hpp:164
OracularSubsettedSlabCache(std::shared_ptr< const tatami::Oracle< Index_ > > oracle, size_t max_slabs)
Definition OracularSubsettedSlabCache.hpp:193
OracularSubsettedSlabCache & operator=(const OracularSubsettedSlabCache &)=delete
size_t get_max_slabs() const
Definition OracularSubsettedSlabCache.hpp:416
size_t get_num_slabs() const
Definition OracularSubsettedSlabCache.hpp:423
std::pair< const Slab_ *, Index_ > next(Ifunction_ identify, Cfunction_ create, Pfunction_ populate)
Definition OracularSubsettedSlabCache.hpp:273
OracularSubsettedSlabCache(const OracularSubsettedSlabCache &)=delete
Index_ next()
Definition OracularSubsettedSlabCache.hpp:241
Methods to handle chunked tatami matrices.
Definition ChunkDimensionStats.hpp:4
OracularSubsettedSlabCacheSelectionType
Definition OracularSubsettedSlabCache.hpp:24
Statistics for regular chunks along a dimension.
Definition ChunkDimensionStats.hpp:35
ChunkDimensionStats()
Definition ChunkDimensionStats.hpp:50
Details on the subset to extract in OracularSubsettedSlabCache.
Definition OracularSubsettedSlabCache.hpp:31
Index_ block_end
Definition OracularSubsettedSlabCache.hpp:57
Index_ block_length
Definition OracularSubsettedSlabCache.hpp:50
std::vector< Index_ > indices
Definition OracularSubsettedSlabCache.hpp:67
OracularSubsettedSlabCacheSelectionType selection
Definition OracularSubsettedSlabCache.hpp:35
Index_ block_start
Definition OracularSubsettedSlabCache.hpp:44
std::unordered_map< Index_, Index_ > mapping
Definition OracularSubsettedSlabCache.hpp:74