tatami_chunked
Helpers to create custom chunked tatami matrices
Loading...
Searching...
No Matches
OracularSlabCache.hpp
Go to the documentation of this file.
1#ifndef TATAMI_CHUNKED_ORACULAR_SLAB_CACHE_HPP
2#define TATAMI_CHUNKED_ORACULAR_SLAB_CACHE_HPP
3
4#include <unordered_map>
5#include <vector>
6#include <list>
7#include <type_traits>
8#include <memory>
9
10#include "tatami/tatami.hpp"
11
17namespace tatami_chunked {
18
35template<typename Id_, typename Index_, class Slab_, bool track_reuse_ = false>
37private:
38 std::shared_ptr<const tatami::Oracle<Index_> > my_oracle;
39 size_t my_total;
40 size_t my_counter = 0;
41
42 Index_ my_last_slab_id = 0;
43 Slab_* my_last_slab = NULL;
44
45 size_t my_max_slabs;
46 std::vector<Slab_> my_all_slabs;
47 std::unordered_map<Id_, Slab_*> my_current_cache, my_future_cache;
48 std::vector<std::pair<Id_, Slab_*> > my_to_populate;
49 std::vector<Id_> my_in_need;
50 size_t my_refresh_point = 0;
51
52 typename std::conditional<track_reuse_, std::vector<std::pair<Id_, Slab_*> >, bool>::type my_to_reuse;
53
54public:
59 OracularSlabCache(std::shared_ptr<const tatami::Oracle<Index_> > oracle, size_t max_slabs) :
60 my_oracle(std::move(oracle)),
61 my_total(my_oracle->total()),
62 my_max_slabs(max_slabs)
63 {
64 my_all_slabs.reserve(max_slabs);
65 my_current_cache.reserve(max_slabs);
66 my_future_cache.reserve(max_slabs);
67 }
68
73
78
82 // Move operators are still okay as pointers still point to the moved vectors.
83 // see https://stackoverflow.com/questions/43988553/stdvector-stdmove-and-pointer-invalidation.
86
87 // Might as well define this.
88 ~OracularSlabCache() = default;
93public:
101 return my_oracle->get(my_counter++);
102 }
103
104public:
139 template<class Ifunction_, class Cfunction_, class Pfunction_>
140 std::pair<const Slab_*, Index_> next(Ifunction_ identify, Cfunction_ create, Pfunction_ populate) {
141 Index_ index = this->next();
142 auto slab_info = identify(index);
143 if (slab_info.first == my_last_slab_id && my_last_slab) {
144 return std::make_pair(my_last_slab, slab_info.second);
145 }
146 my_last_slab_id = slab_info.first;
147
148 // Updating the cache if we hit the refresh point.
149 if (my_counter - 1 == my_refresh_point) {
150 // Note that, for any given populate cycle, the first prediction's
151 // slab cannot already be in the cache, otherwise it would have
152 // incorporated into the previous cycle. So we can skip some code.
153 my_future_cache[slab_info.first] = NULL;
154 my_in_need.push_back(slab_info.first);
155 size_t used_slabs = 1;
156 auto last_future_slab_id = slab_info.first;
157
158 while (++my_refresh_point < my_total) {
159 auto future_index = my_oracle->get(my_refresh_point);
162 continue;
163 }
164
166 if (my_future_cache.find(future_slab_info.first) != my_future_cache.end()) {
167 continue;
168 }
169
170 if (used_slabs == my_max_slabs) {
171 break;
172 }
173 ++used_slabs;
174
175 auto ccIt = my_current_cache.find(future_slab_info.first);
176 if (ccIt == my_current_cache.end()) {
177 my_future_cache[future_slab_info.first] = NULL;
178 my_in_need.push_back(future_slab_info.first);
179
180 } else {
181 auto slab_ptr = ccIt->second;
182 my_future_cache[future_slab_info.first] = slab_ptr;
183 my_current_cache.erase(ccIt);
184 if constexpr(track_reuse_) {
185 my_to_reuse.emplace_back(future_slab_info.first, slab_ptr);
186 }
187 }
188 }
189
190 auto cIt = my_current_cache.begin();
191 for (auto a : my_in_need) {
192 if (cIt != my_current_cache.end()) {
193 my_to_populate.emplace_back(a, cIt->second);
194 my_future_cache[a] = cIt->second;
195 ++cIt;
196 } else {
197 // We reserved my_all_slabs so further push_backs() should not
198 // trigger any reallocation or invalidation of the pointers.
199 my_all_slabs.push_back(create());
200 auto slab_ptr = &(my_all_slabs.back());
201 my_to_populate.emplace_back(a, slab_ptr);
202 my_future_cache[a] = slab_ptr;
203 }
204 }
205 my_in_need.clear();
206
207 if constexpr(track_reuse_) {
208 populate(my_to_populate, my_to_reuse);
209 } else {
210 populate(my_to_populate);
211 }
212
213 my_to_populate.clear();
214 if constexpr(track_reuse_) {
215 my_to_reuse.clear();
216 }
217
218 // We always fill my_future_cache to the brim so every entry of
219 // my_all_slabs should be referenced by a pointer in
220 // my_future_cache. There shouldn't be any free cache entries
221 // remaining in my_current_cache i.e., at this point, cIt should
222 // equal my_current_cache.end(), as we transferred everything to
223 // my_future_cache. Thus it is safe to clear my_current_cache
224 // without worrying about leaking memory. The only exception is if
225 // we run out of predictions, in which case it doesn't matter.
226 my_current_cache.clear();
227 my_current_cache.swap(my_future_cache);
228 }
229
230 // We know it must exist, so no need to check ccIt's validity.
231 auto ccIt = my_current_cache.find(slab_info.first);
232 my_last_slab = ccIt->second;
233 return std::make_pair(my_last_slab, slab_info.second);
234 }
235
236public:
240 size_t get_max_slabs() const {
241 return my_max_slabs;
242 }
243
247 size_t get_num_slabs() const {
248 return my_current_cache.size();
249 }
250};
251
252}
253
254#endif
Oracular-aware cache for slabs.
Definition OracularSlabCache.hpp:36
Index_ next()
Definition OracularSlabCache.hpp:100
OracularSlabCache & operator=(const OracularSlabCache &)=delete
size_t get_max_slabs() const
Definition OracularSlabCache.hpp:240
OracularSlabCache(const OracularSlabCache &)=delete
OracularSlabCache(std::shared_ptr< const tatami::Oracle< Index_ > > oracle, size_t max_slabs)
Definition OracularSlabCache.hpp:59
std::pair< const Slab_ *, Index_ > next(Ifunction_ identify, Cfunction_ create, Pfunction_ populate)
Definition OracularSlabCache.hpp:140
size_t get_num_slabs() const
Definition OracularSlabCache.hpp:247
Methods to handle chunked tatami matrices.
Definition ChunkDimensionStats.hpp:4
Statistics for regular chunks along a dimension.
Definition ChunkDimensionStats.hpp:35