tatami_chunked
Helpers to create custom chunked tatami matrices
Loading...
Searching...
No Matches
OracularVariableSlabCache.hpp
Go to the documentation of this file.
1#ifndef TATAMI_CHUNKED_ORACULAR_VARIABLE_SLAB_CACHE_HPP
2#define TATAMI_CHUNKED_ORACULAR_VARIABLE_SLAB_CACHE_HPP
3
4#include <unordered_map>
5#include <vector>
6#include <list>
7#include <type_traits>
8#include "tatami/tatami.hpp"
9
15namespace tatami_chunked {
16
42template<typename Id_, typename Index_, class Slab_, typename Size_>
44private:
45 std::shared_ptr<const tatami::Oracle<Index_> > my_oracle;
46 size_t my_total;
47 size_t my_counter = 0;
48
49 Index_ my_last_slab_id = 0;
50 size_t my_last_slab_num = -1;
51
52 Size_ my_max_size, my_used_size = 0;
53 std::vector<Slab_> my_all_slabs;
54
55 // We need to hold an offset into 'my_all_slabs' rather than a pointer, as
56 // 'my_all_slabs' might be reallocated upon addition of new slabs, given that
57 // we don't know the maximum number of slabs ahead of time.
58 std::unordered_map<Id_, size_t> my_current_cache, my_future_cache;
59 std::vector<std::pair<Id_, size_t> > my_to_populate, my_to_reuse;
60 std::vector<Id_> my_in_need;
61 std::vector<size_t> my_free_pool;
62 size_t my_refresh_point = 0;
63
64public:
71 my_oracle(std::move(oracle)),
72 my_total(my_oracle->total()),
73 my_max_size(max_size)
74 {}
75
80
85
89 // Move operators are still okay as pointers still point to the moved vectors.
90 // see https://stackoverflow.com/questions/43988553/stdvector-stdmove-and-pointer-invalidation.
93
94 // Might as well define this.
100public:
108 return my_oracle->get(my_counter++);
109 }
110
111public:
156 template<class Ifunction_, class Ufunction_, class Afunction_, class Cfunction_, class Pfunction_>
158 Index_ index = this->next();
159 auto slab_info = identify(index);
160 if (slab_info.first == my_last_slab_id && my_last_slab_num != static_cast<size_t>(-1)) {
161 return std::make_pair(my_all_slabs.data() + my_last_slab_num, slab_info.second);
162 }
163 my_last_slab_id = slab_info.first;
164
165 // Updating the cache if we hit the refresh point.
166 if (my_counter - 1 == my_refresh_point) {
167 // Note that, for any given populate cycle, the first prediction's
168 // slab cannot already be in the cache, otherwise it would have
169 // incorporated into the previous cycle. So we can skip some code.
170 my_used_size = upper_size(slab_info.first);
171 requisition_new_slab(slab_info.first);
172
173 auto last_future_slab_id = slab_info.first;
174 while (++my_refresh_point < my_total) {
175 auto future_index = my_oracle->get(my_refresh_point);
178 continue;
179 }
180
182 if (my_future_cache.find(future_slab_info.first) != my_future_cache.end()) {
183 continue;
184 }
185
186 auto ccIt = my_current_cache.find(future_slab_info.first);
187 if (ccIt != my_current_cache.end()) {
188 size_t slab_num = ccIt->second;
189 auto candidate = my_used_size + actual_size(future_slab_info.first, my_all_slabs[slab_num]);
190 if (candidate > my_max_size) {
191 break;
192 }
193 my_used_size = candidate;
194 my_future_cache[future_slab_info.first] = slab_num;
195 my_to_reuse.emplace_back(future_slab_info.first, slab_num);
196 my_current_cache.erase(ccIt);
197 } else {
198 auto candidate = my_used_size + upper_size(future_slab_info.first);
199 if (candidate > my_max_size) {
200 break;
201 }
202 my_used_size = candidate;
203 requisition_new_slab(future_slab_info.first);
204 }
205 }
206
207 auto cIt = my_current_cache.begin();
208 for (auto a : my_in_need) {
209 if (cIt != my_current_cache.end()) {
210 size_t slab_num = cIt->second;
211 my_to_populate.emplace_back(a, slab_num);
212 my_future_cache[a] = slab_num;
213 ++cIt;
214 } else {
215 size_t slab_num = my_all_slabs.size();
216 my_all_slabs.push_back(create());
217 my_to_populate.emplace_back(a, slab_num);
218 my_future_cache[a] = slab_num;
219 }
220 }
221 my_in_need.clear();
222
223 for (; cIt != my_current_cache.end(); ++cIt) {
224 my_free_pool.emplace_back(cIt->second);
225 }
226
227 populate(my_to_populate, my_to_reuse, my_all_slabs);
228 my_to_populate.clear();
229 my_to_reuse.clear();
230
231 my_current_cache.clear();
232 my_current_cache.swap(my_future_cache);
233 }
234
235 // We know it must exist, so no need to check ccIt's validity.
236 auto ccIt = my_current_cache.find(slab_info.first);
237 my_last_slab_num = ccIt->second;
238 return std::make_pair(my_all_slabs.data() + my_last_slab_num, slab_info.second);
239 }
240
241private:
242 void requisition_new_slab(Id_ slab_id) {
243 if (!my_free_pool.empty()) {
244 auto slab_num = my_free_pool.back();
245 my_future_cache[slab_id] = slab_num;
246 my_free_pool.pop_back();
247 my_to_populate.emplace_back(slab_id, slab_num);
248 } else {
249 my_future_cache[slab_id] = 0;
250 my_in_need.push_back(slab_id);
251 }
252 }
253
254public:
259 size_t get_max_size() const {
260 return my_max_size;
261 }
262
267 size_t get_used_size() const {
268 return my_used_size;
269 }
270
274 size_t get_num_slabs() const {
275 return my_current_cache.size();
276 }
277};
278
279}
280
281#endif
Oracle-aware cache for variable-size slabs.
Definition OracularVariableSlabCache.hpp:43
OracularVariableSlabCache(std::shared_ptr< const tatami::Oracle< Index_ > > oracle, size_t max_size)
Definition OracularVariableSlabCache.hpp:70
size_t get_num_slabs() const
Definition OracularVariableSlabCache.hpp:274
size_t get_used_size() const
Definition OracularVariableSlabCache.hpp:267
std::pair< const Slab_ *, Index_ > next(Ifunction_ identify, Ufunction_ upper_size, Afunction_ actual_size, Cfunction_ create, Pfunction_ populate)
Definition OracularVariableSlabCache.hpp:157
OracularVariableSlabCache(const OracularVariableSlabCache &)=delete
OracularVariableSlabCache & operator=(const OracularVariableSlabCache &)=delete
size_t get_max_size() const
Definition OracularVariableSlabCache.hpp:259
Index_ next()
Definition OracularVariableSlabCache.hpp:107
Methods to handle chunked tatami matrices.
Definition ChunkDimensionStats.hpp:4
Statistics for regular chunks along a dimension.
Definition ChunkDimensionStats.hpp:35