tatami_chunked
Helpers to create custom chunked tatami matrices
Loading...
Searching...
No Matches
OracularSlabCache.hpp
Go to the documentation of this file.
1#ifndef TATAMI_CHUNKED_ORACULAR_SLAB_CACHE_HPP
2#define TATAMI_CHUNKED_ORACULAR_SLAB_CACHE_HPP
3
4#include <unordered_map>
5#include <vector>
6#include <list>
7#include <type_traits>
8#include <memory>
9#include <cstddef>
10
11#include "tatami/tatami.hpp"
12#include "sanisizer/sanisizer.hpp"
13
19namespace tatami_chunked {
20
37template<typename Id_, typename Index_, class Slab_, bool track_reuse_ = false>
39private:
40 std::shared_ptr<const tatami::Oracle<Index_> > my_oracle;
42 tatami::PredictionIndex my_counter = 0;
43
44 Index_ my_last_slab_id = 0;
45 Slab_* my_last_slab = NULL;
46
47 typedef std::vector<Slab_> SlabPool;
48 typename SlabPool::size_type my_max_slabs;
49 SlabPool my_all_slabs;
50
51 std::unordered_map<Id_, Slab_*> my_current_cache, my_future_cache;
52 std::vector<std::pair<Id_, Slab_*> > my_to_populate;
53 std::vector<Id_> my_in_need;
54 tatami::PredictionIndex my_refresh_point = 0;
55
56 typename std::conditional<track_reuse_, std::vector<std::pair<Id_, Slab_*> >, bool>::type my_to_reuse;
57
58public:
64 template<typename MaxSlabs_>
65 OracularSlabCache(std::shared_ptr<const tatami::Oracle<Index_> > oracle, MaxSlabs_ max_slabs) :
66 my_oracle(std::move(oracle)),
67 my_total(my_oracle->total()),
68 my_max_slabs(sanisizer::cast<decltype(my_max_slabs)>(max_slabs))
69 {
70 my_all_slabs.reserve(max_slabs);
71 my_current_cache.reserve(max_slabs);
72 my_future_cache.reserve(max_slabs);
73 }
74
79
84
88 // Move operators are still okay as pointers still point to the moved vectors.
89 // see https://stackoverflow.com/questions/43988553/stdvector-stdmove-and-pointer-invalidation.
92
93 // Might as well define this.
94 ~OracularSlabCache() = default;
99public:
106 Index_ next() {
107 return my_oracle->get(my_counter++);
108 }
109
110public:
145 template<class Ifunction_, class Cfunction_, class Pfunction_>
146 std::pair<const Slab_*, Index_> next(Ifunction_ identify, Cfunction_ create, Pfunction_ populate) {
147 Index_ index = this->next();
148 auto slab_info = identify(index);
149 if (slab_info.first == my_last_slab_id && my_last_slab) {
150 return std::make_pair(my_last_slab, slab_info.second);
151 }
152 my_last_slab_id = slab_info.first;
153
154 // Updating the cache if we hit the refresh point.
155 if (my_counter - 1 == my_refresh_point) {
156 // Note that, for any given populate cycle, the first prediction's
157 // slab cannot already be in the cache, otherwise it would have
158 // incorporated into the previous cycle. So we can skip some code.
159 my_future_cache[slab_info.first] = NULL;
160 my_in_need.push_back(slab_info.first);
161 decltype(my_max_slabs) used_slabs = 1;
162 auto last_future_slab_id = slab_info.first;
163
164 while (++my_refresh_point < my_total) {
165 auto future_index = my_oracle->get(my_refresh_point);
166 auto future_slab_info = identify(future_index);
167 if (last_future_slab_id == future_slab_info.first) {
168 continue;
169 }
170
171 last_future_slab_id = future_slab_info.first;
172 if (my_future_cache.find(future_slab_info.first) != my_future_cache.end()) {
173 continue;
174 }
175
176 if (used_slabs == my_max_slabs) {
177 break;
178 }
179 ++used_slabs;
180
181 auto ccIt = my_current_cache.find(future_slab_info.first);
182 if (ccIt == my_current_cache.end()) {
183 my_future_cache[future_slab_info.first] = NULL;
184 my_in_need.push_back(future_slab_info.first);
185
186 } else {
187 auto slab_ptr = ccIt->second;
188 my_future_cache[future_slab_info.first] = slab_ptr;
189 my_current_cache.erase(ccIt);
190 if constexpr(track_reuse_) {
191 my_to_reuse.emplace_back(future_slab_info.first, slab_ptr);
192 }
193 }
194 }
195
196 auto cIt = my_current_cache.begin();
197 for (auto a : my_in_need) {
198 if (cIt != my_current_cache.end()) {
199 my_to_populate.emplace_back(a, cIt->second);
200 my_future_cache[a] = cIt->second;
201 ++cIt;
202 } else {
203 // We reserved my_all_slabs so further push_backs() should not
204 // trigger any reallocation or invalidation of the pointers.
205 my_all_slabs.push_back(create());
206 auto slab_ptr = &(my_all_slabs.back());
207 my_to_populate.emplace_back(a, slab_ptr);
208 my_future_cache[a] = slab_ptr;
209 }
210 }
211 my_in_need.clear();
212
213 if constexpr(track_reuse_) {
214 populate(my_to_populate, my_to_reuse);
215 } else {
216 populate(my_to_populate);
217 }
218
219 my_to_populate.clear();
220 if constexpr(track_reuse_) {
221 my_to_reuse.clear();
222 }
223
224 // We always fill my_future_cache to the brim so every entry of
225 // my_all_slabs should be referenced by a pointer in
226 // my_future_cache. There shouldn't be any free cache entries
227 // remaining in my_current_cache i.e., at this point, cIt should
228 // equal my_current_cache.end(), as we transferred everything to
229 // my_future_cache. Thus it is safe to clear my_current_cache
230 // without worrying about leaking memory. The only exception is if
231 // we run out of predictions, in which case it doesn't matter.
232 my_current_cache.clear();
233 my_current_cache.swap(my_future_cache);
234 }
235
236 // We know it must exist, so no need to check ccIt's validity.
237 auto ccIt = my_current_cache.find(slab_info.first);
238 my_last_slab = ccIt->second;
239 return std::make_pair(my_last_slab, slab_info.second);
240 }
241
242public:
247 auto get_max_slabs() const {
248 return my_max_slabs;
249 }
250
255 auto get_num_slabs() const {
256 return my_current_cache.size();
257 }
258};
259
260}
261
262#endif
Oracular-aware cache for slabs.
Definition OracularSlabCache.hpp:38
OracularSlabCache(std::shared_ptr< const tatami::Oracle< Index_ > > oracle, MaxSlabs_ max_slabs)
Definition OracularSlabCache.hpp:65
Index_ next()
Definition OracularSlabCache.hpp:106
auto get_num_slabs() const
Definition OracularSlabCache.hpp:255
OracularSlabCache & operator=(const OracularSlabCache &)=delete
OracularSlabCache(const OracularSlabCache &)=delete
std::pair< const Slab_ *, Index_ > next(Ifunction_ identify, Cfunction_ create, Pfunction_ populate)
Definition OracularSlabCache.hpp:146
auto get_max_slabs() const
Definition OracularSlabCache.hpp:247
Methods to handle chunked tatami matrices.
Definition ChunkDimensionStats.hpp:4
std::size_t PredictionIndex