tatami_chunked
Helpers to create custom chunked tatami matrices
Loading...
Searching...
No Matches
CustomDenseChunkedMatrix.hpp
Go to the documentation of this file.
1#ifndef TATAMI_CHUNKED_CUSTOM_DENSE_CHUNKED_MATRIX_HPP
2#define TATAMI_CHUNKED_CUSTOM_DENSE_CHUNKED_MATRIX_HPP
3
4#include "tatami/tatami.hpp"
5#include "custom_internals.hpp"
6#include "SlabCacheStats.hpp"
8#include "LruSlabCache.hpp"
11
12#include <type_traits>
13#include <vector>
14
20namespace tatami_chunked {
21
31 size_t maximum_cache_size = 100000000;
32
39};
40
44namespace CustomChunkedMatrix_internal {
45
46/*********************
47 **** Base classes ***
48 *********************/
49
50template<bool oracle_, typename Value_, typename Index_, typename Chunk_>
51class SoloDenseCore {
52private:
53 const ChunkCoordinator<Index_, false, Chunk_>& my_coordinator;
54 typename Chunk_::Workspace my_chunk_workspace;
55
57 typename std::conditional<oracle_, size_t, bool>::type my_counter = 0;
58
60 typedef typename decltype(my_factory)::Slab Slab;
61
62 // These two instances are not fully allocated Slabs; rather, tmp_solo just
63 // holds the content for a single chunk, while final_solo holds the content
64 // across chunks but only for the requested dimension element. Both cases
65 // are likely to be much smaller than a full Slab, so we're already more
66 // memory-efficient than 'require_minimum_cache = true`.
68 Slab my_final_solo;
69
70public:
73 [[maybe_unused]] const SlabCacheStats& slab_stats, // for consistency with the other base classes.
76 my_coordinator(coordinator),
77 my_oracle(std::move(oracle)),
79 my_tmp_solo(static_cast<size_t>(my_coordinator.get_chunk_nrow()) * static_cast<size_t>(my_coordinator.get_chunk_ncol())),
81 {}
82
83 template<typename ... Args_>
84 std::pair<const Slab*, Index_> fetch_raw(bool row, Index_ i, Args_&& ... args) {
85 if constexpr(oracle_) {
86 i = my_oracle->get(my_counter++);
87 }
88 return my_coordinator.fetch_single(row, i, std::forward<Args_>(args)..., my_chunk_workspace, my_tmp_solo, my_final_solo);
89 }
90};
91
92template<typename Value_, typename Index_, typename Chunk_>
93class MyopicDenseCore {
94private:
95 const ChunkCoordinator<Index_, false, Chunk_>& my_coordinator;
96 typename Chunk_::Workspace my_chunk_workspace;
97
98 DenseSlabFactory<typename Chunk_::value_type> my_factory;
99 typedef typename decltype(my_factory)::Slab Slab;
100
101 LruSlabCache<Index_, Slab> my_cache;
102
103public:
104 MyopicDenseCore(
105 const ChunkCoordinator<Index_, false, Chunk_>& coordinator,
106 const SlabCacheStats& slab_stats,
107 [[maybe_unused]] tatami::MaybeOracle<false, Index_> ora, // for consistency with the other base classes
108 [[maybe_unused]] Index_ secondary_length) :
109 my_coordinator(coordinator),
110 my_factory(slab_stats),
111 my_cache(slab_stats.max_slabs_in_cache)
112 {}
113
114 template<typename ... Args_>
115 std::pair<const Slab*, Index_> fetch_raw(bool row, Index_ i, Args_&& ... args) {
116 return my_coordinator.fetch_myopic(row, i, std::forward<Args_>(args)..., my_chunk_workspace, my_cache, my_factory);
117 }
118};
119
120template<typename Value_, typename Index_, typename Chunk_>
121class OracularDenseCore {
122private:
123 const ChunkCoordinator<Index_, false, Chunk_>& my_coordinator;
124 typename Chunk_::Workspace my_chunk_workspace;
125
126 DenseSlabFactory<typename Chunk_::value_type> my_factory;
127 typedef typename decltype(my_factory)::Slab Slab;
128
129 typename std::conditional<Chunk_::use_subset, OracularSubsettedSlabCache<Index_, Index_, Slab>, OracularSlabCache<Index_, Index_, Slab> >::type my_cache;
130
131public:
132 OracularDenseCore(
133 const ChunkCoordinator<Index_, false, Chunk_>& coordinator,
134 const SlabCacheStats& slab_stats,
136 [[maybe_unused]] Index_ secondary_length) :
137 my_coordinator(coordinator),
138 my_factory(slab_stats),
139 my_cache(std::move(oracle), slab_stats.max_slabs_in_cache)
140 {}
141
142 template<typename ... Args_>
143 std::pair<const Slab*, Index_> fetch_raw(bool row, [[maybe_unused]] Index_ i, Args_&& ... args) {
144 return my_coordinator.fetch_oracular(row, std::forward<Args_>(args)..., my_chunk_workspace, my_cache, my_factory);
145 }
146};
147
148template<bool solo_, bool oracle_, typename Value_, typename Index_, typename Chunk_>
149using DenseCore = typename std::conditional<solo_,
150 SoloDenseCore<oracle_, Value_, Index_, Chunk_>,
151 typename std::conditional<oracle_,
152 OracularDenseCore<Value_, Index_, Chunk_>,
153 MyopicDenseCore<Value_, Index_, Chunk_>
154 >::type
155>::type;
156
157/***********************
158 **** Actual classes ***
159 ***********************/
160
161template<class Slab_, typename Index_, typename Value_>
162const Value_* process_dense_slab(const std::pair<const Slab_*, Index_>& fetched, Value_* buffer, size_t secondary_length) {
163 auto ptr = fetched.first->data + static_cast<size_t>(fetched.second) * secondary_length; // cast to size_t to avoid overflow.
164 std::copy_n(ptr, secondary_length, buffer);
165 return buffer;
166}
167
168template<bool solo_, bool oracle_, typename Value_, typename Index_, typename Chunk_>
169class DenseFull : public tatami::DenseExtractor<oracle_, Value_, Index_> {
170public:
171 DenseFull(
172 const ChunkCoordinator<Index_, false, Chunk_>& coordinator,
173 const SlabCacheStats& slab_stats,
174 bool row,
176 my_row(row),
177 my_secondary_dim(coordinator.get_secondary_dim(row)),
178 my_core(
179 coordinator,
180 slab_stats,
181 std::move(oracle),
182 my_secondary_dim
183 )
184 {}
185
186 const Value_* fetch(Index_ i, Value_* buffer) {
187 auto fetched = my_core.fetch_raw(my_row, i, 0, my_secondary_dim);
188 return process_dense_slab(fetched, buffer, my_secondary_dim);
189 }
190
191private:
192 bool my_row;
193 Index_ my_secondary_dim;
194 DenseCore<solo_, oracle_, Value_, Index_, Chunk_> my_core;
195};
196
197template<bool solo_, bool oracle_, typename Value_, typename Index_, typename Chunk_>
198class DenseBlock : public tatami::DenseExtractor<oracle_, Value_, Index_> {
199public:
200 DenseBlock(
201 const ChunkCoordinator<Index_, false, Chunk_>& coordinator,
202 const SlabCacheStats& slab_stats,
203 bool row,
205 Index_ block_start,
206 Index_ block_length) :
207 my_row(row),
208 my_block_start(block_start),
209 my_block_length(block_length),
210 my_core(
211 coordinator,
212 slab_stats,
213 std::move(ora),
214 block_length
215 )
216 {}
217
218 const Value_* fetch(Index_ i, Value_* buffer) {
219 auto fetched = my_core.fetch_raw(my_row, i, my_block_start, my_block_length);
220 return process_dense_slab(fetched, buffer, my_block_length);
221 }
222
223private:
224 bool my_row;
225 Index_ my_block_start, my_block_length;
226 DenseCore<solo_, oracle_, Value_, Index_, Chunk_> my_core;
227};
228
229template<bool solo_, bool oracle_, typename Value_, typename Index_, typename Chunk_>
230class DenseIndex : public tatami::DenseExtractor<oracle_, Value_, Index_> {
231public:
232 DenseIndex(
233 const ChunkCoordinator<Index_, false, Chunk_>& coordinator,
234 const SlabCacheStats& slab_stats,
235 bool row,
237 tatami::VectorPtr<Index_> indices_ptr) :
238 my_row(row),
239 my_indices_ptr(std::move(indices_ptr)),
240 my_core(
241 coordinator,
242 slab_stats,
243 std::move(oracle),
244 my_indices_ptr->size()
245 )
246 {}
247
248 const Value_* fetch(Index_ i, Value_* buffer) {
249 auto fetched = my_core.fetch_raw(my_row, i, *my_indices_ptr, my_tmp_indices);
250 return process_dense_slab(fetched, buffer, my_indices_ptr->size());
251 }
252
253private:
254 bool my_row;
255 tatami::VectorPtr<Index_> my_indices_ptr;
256 std::vector<Index_> my_tmp_indices;
257 DenseCore<solo_, oracle_, Value_, Index_, Chunk_> my_core;
258};
259
260}
279template<typename Value_, typename Index_, typename Chunk_>
280class CustomDenseChunkedMatrix : public tatami::Matrix<Value_, Index_> {
281public:
294 my_cache_size_in_elements(opt.maximum_cache_size / sizeof(typename Chunk_::value_type)),
295 my_require_minimum_cache(opt.require_minimum_cache)
296 {}
297
298private:
299 CustomChunkedMatrix_internal::ChunkCoordinator<Index_, false, Chunk_> my_coordinator;
300 size_t my_cache_size_in_elements;
301 bool my_require_minimum_cache;
302
303public:
304 Index_ nrow() const {
305 return my_coordinator.get_nrow();
306 }
307
308 Index_ ncol() const {
309 return my_coordinator.get_ncol();
310 }
311
312 bool prefer_rows() const {
313 return my_coordinator.prefer_rows_internal();
314 }
315
316 bool uses_oracle(bool) const {
317 return true;
318 }
319
320 double prefer_rows_proportion() const {
321 return static_cast<double>(my_coordinator.prefer_rows_internal());
322 }
323
324 bool is_sparse() const {
325 return false;
326 }
327
328 double is_sparse_proportion() const {
329 return 0;
330 }
331
332 using tatami::Matrix<Value_, Index_>::dense;
333
334 using tatami::Matrix<Value_, Index_>::sparse;
335
336 /********************
337 *** Myopic dense ***
338 ********************/
339private:
340 template<bool oracle_, template<bool, bool, typename, typename, class> class Extractor_, typename ... Args_>
341 std::unique_ptr<tatami::DenseExtractor<oracle_, Value_, Index_> > raw_dense_internal(bool row, Index_ secondary_length, Args_&& ... args) const {
342 if (row) {
343 // Remember, the num_chunks_per_column is the number of slabs needed to divide up all the *rows* of the matrix.
344 SlabCacheStats stats(my_coordinator.get_chunk_nrow(), secondary_length, my_coordinator.get_num_chunks_per_column(), my_cache_size_in_elements, my_require_minimum_cache);
345 if (stats.max_slabs_in_cache > 0) {
346 return std::make_unique<Extractor_<false, oracle_, Value_, Index_, Chunk_> >(my_coordinator, stats, row, std::forward<Args_>(args)...);
347 } else {
348 return std::make_unique<Extractor_<true, oracle_, Value_, Index_, Chunk_> >(my_coordinator, stats, row, std::forward<Args_>(args)...);
349 }
350 } else {
351 // Remember, the num_chunks_per_row is the number of slabs needed to divide up all the *columns* of the matrix.
352 SlabCacheStats stats(my_coordinator.get_chunk_ncol(), secondary_length, my_coordinator.get_num_chunks_per_row(), my_cache_size_in_elements, my_require_minimum_cache);
353 if (stats.max_slabs_in_cache > 0) {
354 return std::make_unique<Extractor_<false, oracle_, Value_, Index_, Chunk_> >(my_coordinator, stats, row, std::forward<Args_>(args)...);
355 } else {
356 return std::make_unique<Extractor_<true, oracle_, Value_, Index_, Chunk_> >(my_coordinator, stats, row, std::forward<Args_>(args)...);
357 }
358 }
359 }
360
361 template<bool oracle_>
362 std::unique_ptr<tatami::DenseExtractor<oracle_, Value_, Index_> > dense_internal(bool row, tatami::MaybeOracle<oracle_, Index_> oracle, const tatami::Options&) const {
363 auto secondary = (row ? my_coordinator.get_ncol() : my_coordinator.get_nrow());
364 return raw_dense_internal<oracle_, CustomChunkedMatrix_internal::DenseFull>(row, secondary, std::move(oracle));
365 }
366
367 template<bool oracle_>
368 std::unique_ptr<tatami::DenseExtractor<oracle_, Value_, Index_> > dense_internal(
369 bool row,
371 Index_ block_start,
372 Index_ block_length,
373 const tatami::Options&)
374 const {
375 return raw_dense_internal<oracle_, CustomChunkedMatrix_internal::DenseBlock>(row, block_length, std::move(oracle), block_start, block_length);
376 }
377
378 template<bool oracle_>
379 std::unique_ptr<tatami::DenseExtractor<oracle_, Value_, Index_> > dense_internal(
380 bool row,
382 tatami::VectorPtr<Index_> indices_ptr,
383 const tatami::Options&)
384 const {
385 auto num_indices = indices_ptr->size();
386 return raw_dense_internal<oracle_, CustomChunkedMatrix_internal::DenseIndex>(row, num_indices, std::move(oracle), std::move(indices_ptr));
387 }
388
389public:
390 std::unique_ptr<tatami::MyopicDenseExtractor<Value_, Index_> > dense(bool row, const tatami::Options& opt) const {
391 return dense_internal<false>(row, false, opt);
392 }
393
394 std::unique_ptr<tatami::MyopicDenseExtractor<Value_, Index_> > dense(bool row, Index_ block_start, Index_ block_length, const tatami::Options& opt) const {
395 return dense_internal<false>(row, false, block_start, block_length, opt);
396 }
397
398 std::unique_ptr<tatami::MyopicDenseExtractor<Value_, Index_> > dense(bool row, tatami::VectorPtr<Index_> indices_ptr, const tatami::Options& opt) const {
399 return dense_internal<false>(row, false, std::move(indices_ptr), opt);
400 }
401
402 /**********************
403 *** Oracular dense ***
404 **********************/
405public:
406 std::unique_ptr<tatami::OracularDenseExtractor<Value_, Index_> > dense(
407 bool row,
408 std::shared_ptr<const tatami::Oracle<Index_> > oracle,
409 const tatami::Options& opt)
410 const {
411 return dense_internal<true>(row, std::move(oracle), opt);
412 }
413
414 std::unique_ptr<tatami::OracularDenseExtractor<Value_, Index_> > dense(
415 bool row,
416 std::shared_ptr<const tatami::Oracle<Index_> > oracle,
417 Index_ block_start,
418 Index_ block_length,
419 const tatami::Options& opt)
420 const {
421 return dense_internal<true>(row, std::move(oracle), block_start, block_length, opt);
422 }
423
424 std::unique_ptr<tatami::OracularDenseExtractor<Value_, Index_> > dense(
425 bool row,
426 std::shared_ptr<const tatami::Oracle<Index_> > oracle,
427 tatami::VectorPtr<Index_> indices_ptr,
428 const tatami::Options& opt)
429 const {
430 return dense_internal<true>(row, std::move(oracle), std::move(indices_ptr), opt);
431 }
432
433 /*********************
434 *** Myopic sparse ***
435 *********************/
436public:
437 std::unique_ptr<tatami::MyopicSparseExtractor<Value_, Index_> > sparse(bool row, const tatami::Options& opt) const {
438 return std::make_unique<tatami::FullSparsifiedWrapper<false, Value_, Index_> >(dense(row, opt), my_coordinator.get_secondary_dim(row), opt);
439 }
440
441 std::unique_ptr<tatami::MyopicSparseExtractor<Value_, Index_> > sparse(bool row, Index_ block_start, Index_ block_length, const tatami::Options& opt) const {
442 return std::make_unique<tatami::BlockSparsifiedWrapper<false, Value_, Index_> >(dense(row, block_start, block_length, opt), block_start, block_length, opt);
443 }
444
445 std::unique_ptr<tatami::MyopicSparseExtractor<Value_, Index_> > sparse(bool row, tatami::VectorPtr<Index_> indices_ptr, const tatami::Options& opt) const {
446 auto d = dense(row, indices_ptr, opt);
447 return std::make_unique<tatami::IndexSparsifiedWrapper<false, Value_, Index_> >(std::move(d), std::move(indices_ptr), opt);
448 }
449
450 /***********************
451 *** Oracular sparse ***
452 ***********************/
453public:
454 std::unique_ptr<tatami::OracularSparseExtractor<Value_, Index_> > sparse(
455 bool row,
456 std::shared_ptr<const tatami::Oracle<Index_> > oracle,
457 const tatami::Options& opt)
458 const {
459 return std::make_unique<tatami::FullSparsifiedWrapper<true, Value_, Index_> >(dense(row, std::move(oracle), opt), my_coordinator.get_secondary_dim(row), opt);
460 }
461
462 std::unique_ptr<tatami::OracularSparseExtractor<Value_, Index_> > sparse(
463 bool row,
464 std::shared_ptr<const tatami::Oracle<Index_> > oracle,
465 Index_ block_start,
466 Index_ block_length,
467 const tatami::Options& opt)
468 const {
469 return std::make_unique<tatami::BlockSparsifiedWrapper<true, Value_, Index_> >(dense(row, std::move(oracle), block_start, block_length, opt), block_start, block_length, opt);
470 }
471
472 std::unique_ptr<tatami::OracularSparseExtractor<Value_, Index_> > sparse(
473 bool row,
474 std::shared_ptr<const tatami::Oracle<Index_> > oracle,
475 tatami::VectorPtr<Index_> indices_ptr,
476 const tatami::Options& opt)
477 const {
478 auto d = dense(row, std::move(oracle), indices_ptr, opt);
479 return std::make_unique<tatami::IndexSparsifiedWrapper<true, Value_, Index_> >(std::move(d), std::move(indices_ptr), opt);
480 }
481};
482
483}
484
485#endif
Factory for dense slabs.
Create a LRU cache of slabs.
Create a oracle-aware cache for slabs.
Create a oracle-aware cache with subsets.
Slab cache statistics.
Matrix of custom dense chunks.
Definition CustomDenseChunkedMatrix.hpp:280
CustomDenseChunkedMatrix(Index_ mat_nrow, Index_ mat_ncol, Index_ chunk_nrow, Index_ chunk_ncol, std::vector< Chunk_ > chunks, bool row_major, const CustomDenseChunkedMatrixOptions &opt)
Definition CustomDenseChunkedMatrix.hpp:292
Methods to handle chunked tatami matrices.
Definition ChunkDimensionStats.hpp:4
typename std::conditional< oracle_, OracularDenseExtractor< Value_, Index_ >, MyopicDenseExtractor< Value_, Index_ > >::type DenseExtractor
typename std::conditional< oracle_, std::shared_ptr< const Oracle< Index_ > >, bool >::type MaybeOracle
std::shared_ptr< const std::vector< Index_ > > VectorPtr
Statistics for regular chunks along a dimension.
Definition ChunkDimensionStats.hpp:35
Options for data extraction from a CustomDenseChunkedMatrix.
Definition CustomDenseChunkedMatrix.hpp:25
bool require_minimum_cache
Definition CustomDenseChunkedMatrix.hpp:38
size_t maximum_cache_size
Definition CustomDenseChunkedMatrix.hpp:31
Statistics for slab caching.
Definition SlabCacheStats.hpp:20