tatami_chunked
Helpers to create custom chunked tatami matrices
Loading...
Searching...
No Matches
OracularSlabCache.hpp
Go to the documentation of this file.
1#ifndef TATAMI_CHUNKED_ORACULAR_SLAB_CACHE_HPP
2#define TATAMI_CHUNKED_ORACULAR_SLAB_CACHE_HPP
3
4#include "utils.hpp"
5
6#include <unordered_map>
7#include <vector>
8#include <list>
9#include <type_traits>
10#include <memory>
11#include <cstddef>
12
13#include "tatami/tatami.hpp"
14#include "sanisizer/sanisizer.hpp"
15
21namespace tatami_chunked {
22
39template<typename Id_, typename Index_, class Slab_, bool track_reuse_ = false>
41private:
42 std::shared_ptr<const tatami::Oracle<Index_> > my_oracle;
44 tatami::PredictionIndex my_counter = 0;
45
46 Id_ my_last_slab_id = 0;
47 Slab_* my_last_slab = NULL;
48
49 typedef std::vector<Slab_> SlabPool;
50 typename SlabPool::size_type my_max_slabs;
51 SlabPool my_all_slabs;
52
53 std::unordered_map<Id_, Slab_*> my_current_cache, my_future_cache;
54 std::vector<std::pair<Id_, Slab_*> > my_to_populate;
55 std::vector<Id_> my_in_need;
56 tatami::PredictionIndex my_refresh_point = 0;
57
58 typename std::conditional<track_reuse_, std::vector<std::pair<Id_, Slab_*> >, bool>::type my_to_reuse;
59
60public:
66 template<typename MaxSlabs_>
67 OracularSlabCache(std::shared_ptr<const tatami::Oracle<Index_> > oracle, MaxSlabs_ max_slabs) :
68 my_oracle(std::move(oracle)),
69 my_total(my_oracle->total()),
70 my_max_slabs(sanisizer::cast<I<decltype(my_max_slabs)> >(max_slabs))
71 {
72 my_all_slabs.reserve(max_slabs);
73 my_current_cache.reserve(max_slabs);
74 my_future_cache.reserve(max_slabs);
75 }
76
81
86
90 // Move operators are still okay as pointers still point to the moved vectors.
91 // see https://stackoverflow.com/questions/43988553/stdvector-stdmove-and-pointer-invalidation.
94
95 // Might as well define this.
96 ~OracularSlabCache() = default;
101public:
108 Index_ next() {
109 return my_oracle->get(my_counter++);
110 }
111
112public:
147 template<class Ifunction_, class Cfunction_, class Pfunction_>
148 std::pair<const Slab_*, Index_> next(Ifunction_ identify, Cfunction_ create, Pfunction_ populate) {
149 Index_ index = this->next();
150 auto slab_info = identify(index);
151 if (slab_info.first == my_last_slab_id && my_last_slab) {
152 return std::make_pair(my_last_slab, slab_info.second);
153 }
154 my_last_slab_id = slab_info.first;
155
156 // Updating the cache if we hit the refresh point.
157 if (my_counter - 1 == my_refresh_point) {
158 // Note that, for any given populate cycle, the first prediction's
159 // slab cannot already be in the cache, otherwise it would have
160 // incorporated into the previous cycle. So we can skip some code.
161 my_future_cache[slab_info.first] = NULL;
162 my_in_need.push_back(slab_info.first);
163 I<decltype(my_max_slabs)> used_slabs = 1;
164 auto last_future_slab_id = slab_info.first;
165
166 while (++my_refresh_point < my_total) {
167 auto future_index = my_oracle->get(my_refresh_point);
168 auto future_slab_info = identify(future_index);
169 if (last_future_slab_id == future_slab_info.first) {
170 continue;
171 }
172
173 last_future_slab_id = future_slab_info.first;
174 if (my_future_cache.find(future_slab_info.first) != my_future_cache.end()) {
175 continue;
176 }
177
178 if (used_slabs == my_max_slabs) {
179 break;
180 }
181 ++used_slabs;
182
183 auto ccIt = my_current_cache.find(future_slab_info.first);
184 if (ccIt == my_current_cache.end()) {
185 my_future_cache[future_slab_info.first] = NULL;
186 my_in_need.push_back(future_slab_info.first);
187
188 } else {
189 auto slab_ptr = ccIt->second;
190 my_future_cache[future_slab_info.first] = slab_ptr;
191 my_current_cache.erase(ccIt);
192 if constexpr(track_reuse_) {
193 my_to_reuse.emplace_back(future_slab_info.first, slab_ptr);
194 }
195 }
196 }
197
198 auto cIt = my_current_cache.begin();
199 for (auto a : my_in_need) {
200 if (cIt != my_current_cache.end()) {
201 my_to_populate.emplace_back(a, cIt->second);
202 my_future_cache[a] = cIt->second;
203 ++cIt;
204 } else {
205 // We reserved my_all_slabs so further push_backs() should not
206 // trigger any reallocation or invalidation of the pointers.
207 my_all_slabs.push_back(create());
208 auto slab_ptr = &(my_all_slabs.back());
209 my_to_populate.emplace_back(a, slab_ptr);
210 my_future_cache[a] = slab_ptr;
211 }
212 }
213 my_in_need.clear();
214
215 if constexpr(track_reuse_) {
216 populate(my_to_populate, my_to_reuse);
217 } else {
218 populate(my_to_populate);
219 }
220
221 my_to_populate.clear();
222 if constexpr(track_reuse_) {
223 my_to_reuse.clear();
224 }
225
226 // We always fill my_future_cache to the brim so every entry of
227 // my_all_slabs should be referenced by a pointer in
228 // my_future_cache. There shouldn't be any free cache entries
229 // remaining in my_current_cache i.e., at this point, cIt should
230 // equal my_current_cache.end(), as we transferred everything to
231 // my_future_cache. Thus it is safe to clear my_current_cache
232 // without worrying about leaking memory. The only exception is if
233 // we run out of predictions, in which case it doesn't matter.
234 my_current_cache.clear();
235 my_current_cache.swap(my_future_cache);
236 }
237
238 // We know it must exist, so no need to check ccIt's validity.
239 auto ccIt = my_current_cache.find(slab_info.first);
240 my_last_slab = ccIt->second;
241 return std::make_pair(my_last_slab, slab_info.second);
242 }
243
244public:
249 auto get_max_slabs() const {
250 return my_max_slabs;
251 }
252
257 auto get_num_slabs() const {
258 return my_current_cache.size();
259 }
260};
261
262}
263
264#endif
Oracular-aware cache for slabs.
Definition OracularSlabCache.hpp:40
OracularSlabCache(std::shared_ptr< const tatami::Oracle< Index_ > > oracle, MaxSlabs_ max_slabs)
Definition OracularSlabCache.hpp:67
Index_ next()
Definition OracularSlabCache.hpp:108
auto get_num_slabs() const
Definition OracularSlabCache.hpp:257
OracularSlabCache & operator=(const OracularSlabCache &)=delete
OracularSlabCache(const OracularSlabCache &)=delete
std::pair< const Slab_ *, Index_ > next(Ifunction_ identify, Cfunction_ create, Pfunction_ populate)
Definition OracularSlabCache.hpp:148
auto get_max_slabs() const
Definition OracularSlabCache.hpp:249
Methods to handle chunked tatami matrices.
Definition ChunkDimensionStats.hpp:11
std::size_t PredictionIndex