tatami_hdf5
tatami bindings for HDF5-backed matrices
Loading...
Searching...
No Matches
DenseMatrix.hpp
Go to the documentation of this file.
1#ifndef TATAMI_HDF5_DENSE_MATRIX_HPP
2#define TATAMI_HDF5_DENSE_MATRIX_HPP
3
4#include "serialize.hpp"
5#include "utils.hpp"
6
7#include <string>
8#include <type_traits>
9#include <cmath>
10#include <vector>
11
12#include "H5Cpp.h"
13#include "tatami_chunked/tatami_chunked.hpp"
14#include "sanisizer/sanisizer.hpp"
15
22namespace tatami_hdf5 {
23
37 std::size_t maximum_cache_size = sanisizer::cap<std::size_t>(100000000);
38
45};
46
50namespace DenseMatrix_internal {
51
52// All HDF5-related members.
53struct Components {
54 H5::H5File file;
55 H5::DataSet dataset;
56 H5::DataSpace dataspace;
57 H5::DataSpace memspace;
58};
59
60// In all cases, we know that the dimension extents can be safely casted between hsize_t and Index_,
61// because we checked for a safe cast (via the ChunkDimensionStats constructor) in the DenseMatrix constructor.
62// This is in addition to knowing that the extents can be safely casted to size_t as per the tatami contract.
63
64template<typename Index_, typename OutputValue_>
65void extract_block(bool h5_row_is_target, Index_ cache_start, Index_ cache_length, Index_ block_start, Index_ block_length, OutputValue_* buffer, Components& comp) {
66 hsize_t offset[2];
67 hsize_t count[2];
68
69 int target_dim = 1 - h5_row_is_target;
70 offset[target_dim] = cache_start;
71 count[target_dim] = cache_length;
72
73 int non_target_dim = h5_row_is_target;
74 offset[non_target_dim] = block_start;
75 count[non_target_dim] = block_length;
76 comp.dataspace.selectHyperslab(H5S_SELECT_SET, count, offset);
77
78 // HDF5 is a lot faster when the memspace and dataspace match in dimensionality.
79 // Presumably there is some shuffling that happens inside when dimensions don't match.
80 comp.memspace.setExtentSimple(2, count);
81 comp.memspace.selectAll();
82
83 comp.dataset.read(buffer, define_mem_type<OutputValue_>(), comp.memspace, comp.dataspace);
84}
85
86template<typename Index_, typename TmpValue_, typename OutputValue_>
87void extract_indices(
88 bool h5_row_is_target,
89 Index_ chunk_start,
90 Index_ chunk_length,
91 const std::vector<Index_>& indices,
92 TmpValue_* tmp_buffer,
93 OutputValue_* final_buffer,
94 Components& comp
95) {
96 const Index_ num_indices = indices.size();
97 if (num_indices == 0) {
98 return;
99 }
100
101 hsize_t offset[2];
102 hsize_t count[2];
103
104 int target_dim = 1 - h5_row_is_target;
105 offset[target_dim] = chunk_start;
106 count[target_dim] = chunk_length;
107
108 // The strategy here is to just extract the entire range into our tmp_buffer and then cherry-pick the indices we want into the final_buffer.
109 // This is because the hyperslab unions are still brutally slow in 1.14.6, so it's not worth creating a non-contiguous union.
110 int non_target_dim = h5_row_is_target;
111 const auto first_index = indices.front();
112 offset[non_target_dim] = first_index;
113 const auto non_target_range = indices.back() - first_index + 1;
114 count[non_target_dim] = non_target_range;
115
116 comp.dataspace.selectHyperslab(H5S_SELECT_SET, count, offset);
117 comp.memspace.setExtentSimple(2, count);
118 comp.memspace.selectAll();
119 comp.dataset.read(tmp_buffer, define_mem_type<OutputValue_>(), comp.memspace, comp.dataspace);
120
121 if (h5_row_is_target) {
122 for (Index_ t = 0; t < chunk_length; ++t) {
123 for (Index_ i = 0; i < num_indices; ++i) {
124 const auto in_offset = sanisizer::nd_offset<std::size_t>(indices[i] - first_index, non_target_range, t);
125 const auto out_offset = sanisizer::nd_offset<std::size_t>(i, num_indices, t);
126 final_buffer[out_offset] = tmp_buffer[in_offset];
127 }
128 }
129 } else {
130 for (Index_ i = 0; i < num_indices; ++i) {
131 const auto in_offset = sanisizer::product_unsafe<std::size_t>(chunk_length, indices[i] - first_index);
132 const auto out_offset = sanisizer::product_unsafe<std::size_t>(chunk_length, i);
133 std::copy_n(tmp_buffer + in_offset, chunk_length, final_buffer + out_offset);
134 }
135 }
136}
137
138/********************
139 *** Core classes ***
140 ********************/
141
142// We store the Components in a pointer so that we can serialize their
143// construction and destruction.
144
145inline void initialize(const std::string& file_name, const std::string& dataset_name, std::unique_ptr<Components>& h5comp) {
146 serialize([&]() -> void {
147 h5comp.reset(new Components);
148
149 // Turn off HDF5's caching, as we'll be handling that.
150 // This allows us to parallelize extractions without locking when the data has already been loaded into memory.
151 // If we just used HDF5's cache, we would have to lock on every extraction, given the lack of thread safety.
152 H5::FileAccPropList fapl(H5::FileAccPropList::DEFAULT.getId());
153 fapl.setCache(0, 0, 0, 0);
154
155 h5comp->file.openFile(file_name, H5F_ACC_RDONLY, fapl);
156 h5comp->dataset = h5comp->file.openDataSet(dataset_name);
157 h5comp->dataspace = h5comp->dataset.getSpace();
158 });
159}
160
161inline void destroy(std::unique_ptr<Components>& h5comp) {
162 serialize([&]() -> void {
163 h5comp.reset();
164 });
165}
166
167template<bool oracle_, typename Index_, typename CachedValue_>
168class SoloCore {
169public:
170 SoloCore(
171 const std::string& file_name,
172 const std::string& dataset_name,
173 [[maybe_unused]] tatami_chunked::ChunkDimensionStats<Index_> target_dim_stats, // only listed here for compatibility with the other constructors.
175 [[maybe_unused]] const tatami_chunked::SlabCacheStats<Index_>& slab_stats,
176 const bool h5_row_is_target,
177 const Index_ non_target_range_for_indexed
178 ) :
179 my_oracle(std::move(oracle)),
180 my_h5_row_is_target(h5_row_is_target),
181 my_non_target_buffer_for_indexed(sanisizer::cast<decltype(my_non_target_buffer_for_indexed.size())>(non_target_range_for_indexed))
182 {
183 initialize(file_name, dataset_name, my_h5comp);
184 }
185
186 ~SoloCore() {
187 destroy(my_h5comp);
188 }
189
190private:
191 std::unique_ptr<Components> my_h5comp;
193 typename std::conditional<oracle_, tatami::PredictionIndex, bool>::type my_counter = 0;
194 bool my_h5_row_is_target;
195 std::vector<CachedValue_> my_non_target_buffer_for_indexed;
196
197public:
198 template<typename Value_>
199 const Value_* fetch_block(Index_ i, Index_ block_start, Index_ block_length, Value_* buffer) {
200 if constexpr(oracle_) {
201 i = my_oracle->get(my_counter++);
202 }
203 serialize([&]() -> void {
204 extract_block(my_h5_row_is_target, i, static_cast<Index_>(1), block_start, block_length, buffer, *my_h5comp);
205 });
206 return buffer;
207 }
208
209 template<typename Value_>
210 const Value_* fetch_indices(Index_ i, const std::vector<Index_>& indices, Value_* buffer) {
211 if constexpr(oracle_) {
212 i = my_oracle->get(my_counter++);
213 }
214 serialize([&]() -> void {
215 extract_indices(my_h5_row_is_target, i, static_cast<Index_>(1), indices, my_non_target_buffer_for_indexed.data(), buffer, *my_h5comp);
216 });
217 return buffer;
218 }
219};
220
221template<typename Index_, typename CachedValue_>
222class MyopicCore {
223public:
224 MyopicCore(
225 const std::string& file_name,
226 const std::string& dataset_name,
227 tatami_chunked::ChunkDimensionStats<Index_> target_dim_stats,
228 [[maybe_unused]] tatami::MaybeOracle<false, Index_>, // for consistency with the oracular version.
229 const tatami_chunked::SlabCacheStats<Index_>& slab_stats,
230 const bool h5_row_is_target,
231 const Index_ non_target_range_for_indexed
232 ) :
233 my_dim_stats(std::move(target_dim_stats)),
234 my_factory(slab_stats),
235 my_cache(slab_stats.max_slabs_in_cache),
236 my_h5_row_is_target(h5_row_is_target),
237 my_non_target_buffer_for_indexed(sanisizer::product<decltype(my_non_target_buffer_for_indexed.size())>(non_target_range_for_indexed, my_dim_stats.chunk_length))
238 {
239 initialize(file_name, dataset_name, my_h5comp);
240 if (!my_h5_row_is_target) {
241 sanisizer::resize(my_transposition_buffer, slab_stats.slab_size_in_elements);
242 }
243 }
244
245 ~MyopicCore() {
246 destroy(my_h5comp);
247 }
248
249private:
250 std::unique_ptr<Components> my_h5comp;
251 tatami_chunked::ChunkDimensionStats<Index_> my_dim_stats;
252
253 tatami_chunked::DenseSlabFactory<CachedValue_> my_factory;
254 typedef typename decltype(my_factory)::Slab Slab;
255 tatami_chunked::LruSlabCache<Index_, Slab> my_cache;
256
257 bool my_h5_row_is_target;
258 std::vector<CachedValue_> my_transposition_buffer;
259
260 std::vector<CachedValue_> my_non_target_buffer_for_indexed;
261
262private:
263 template<typename Value_, class Extract_>
264 void fetch_raw(Index_ i, Value_* buffer, Index_ non_target_length, Extract_ extract) {
265 Index_ chunk = i / my_dim_stats.chunk_length;
266 Index_ index = i % my_dim_stats.chunk_length;
267
268 const auto& info = my_cache.find(
269 chunk,
270 /* create = */ [&]() -> Slab {
271 return my_factory.create();
272 },
273 /* populate = */ [&](Index_ id, Slab& contents) -> void {
274 auto curdim = tatami_chunked::get_chunk_length(my_dim_stats, id);
275
276 if (my_h5_row_is_target) {
277 serialize([&]() -> void {
278 extract(id * my_dim_stats.chunk_length, curdim, contents.data);
279 });
280
281 } else {
282 // Transposing the data for easier retrieval, but only once the lock is released.
283 auto tptr = my_transposition_buffer.data();
284 serialize([&]() -> void {
285 extract(id * my_dim_stats.chunk_length, curdim, tptr);
286 });
287 tatami::transpose(tptr, non_target_length, curdim, contents.data);
288 }
289 }
290 );
291
292 auto ptr = info.data + sanisizer::product_unsafe<std::size_t>(non_target_length, index);
293 std::copy_n(ptr, non_target_length, buffer);
294 }
295
296public:
297 template<typename Value_>
298 const Value_* fetch_block(Index_ i, Index_ block_start, Index_ block_length, Value_* buffer) {
299 fetch_raw(
300 i,
301 buffer,
302 block_length,
303 [&](Index_ start, Index_ length, CachedValue_* buf) -> void {
304 extract_block(my_h5_row_is_target, start, length, block_start, block_length, buf, *my_h5comp);
305 }
306 );
307 return buffer;
308 }
309
310 template<typename Value_>
311 const Value_* fetch_indices(Index_ i, const std::vector<Index_>& indices, Value_* buffer) {
312 fetch_raw(
313 i,
314 buffer,
315 indices.size(),
316 [&](Index_ start, Index_ length, CachedValue_* buf) -> void {
317 extract_indices(my_h5_row_is_target, start, length, indices, my_non_target_buffer_for_indexed.data(), buf, *my_h5comp);
318 }
319 );
320 return buffer;
321 }
322};
323
324template<typename Index_, typename CachedValue_>
325class OracularBlockNormal {
326public:
327 OracularBlockNormal(
328 const std::string& file_name,
329 const std::string& dataset_name,
330 tatami_chunked::ChunkDimensionStats<Index_> target_dim_stats,
332 const tatami_chunked::SlabCacheStats<Index_>& slab_stats,
333 [[maybe_unused]] const bool h5_row_is_target, // for consistency with the solo/myopic version
334 [[maybe_unused]] const Index_ non_target_range_for_indexed
335 ) :
336 my_dim_stats(std::move(target_dim_stats)),
337 my_cache(std::move(oracle), slab_stats.max_slabs_in_cache),
338 my_slab_size(slab_stats.slab_size_in_elements),
339 my_memory_pool(sanisizer::product<decltype(my_memory_pool.size())>(slab_stats.max_slabs_in_cache, my_slab_size))
340 {
341 initialize(file_name, dataset_name, my_h5comp);
342 }
343
344 ~OracularBlockNormal() {
345 destroy(my_h5comp);
346 }
347
348private:
349 std::unique_ptr<Components> my_h5comp;
350 tatami_chunked::ChunkDimensionStats<Index_> my_dim_stats;
351
352 struct Slab {
353 std::size_t offset;
354 };
355
356 tatami_chunked::OracularSlabCache<Index_, Index_, Slab, true> my_cache;
357 std::size_t my_slab_size;
358 std::vector<CachedValue_> my_memory_pool;
359 std::size_t my_offset = 0;
360
361private:
362 template<class Function_>
363 static void sort_by_field(std::vector<std::pair<Index_, Slab*> >& indices, Function_ field) {
364 auto comp = [&field](const std::pair<Index_, Slab*>& l, const std::pair<Index_, Slab*>& r) -> bool {
365 return field(l) < field(r);
366 };
367 if (!std::is_sorted(indices.begin(), indices.end(), comp)) {
368 std::sort(indices.begin(), indices.end(), comp);
369 }
370 }
371
372public:
373 template<typename Value_>
374 const Value_* fetch_block([[maybe_unused]] Index_ i, Index_ block_start, Index_ block_length, Value_* buffer) {
375 auto info = my_cache.next(
376 /* identify = */ [&](Index_ current) -> std::pair<Index_, Index_> {
377 return std::pair<Index_, Index_>(current / my_dim_stats.chunk_length, current % my_dim_stats.chunk_length);
378 },
379 /* create = */ [&]() -> Slab {
380 Slab output;
381 output.offset = my_offset;
382 my_offset += my_slab_size;
383 return output;
384 },
385 /* populate = */ [&](std::vector<std::pair<Index_, Slab*> >& chunks, std::vector<std::pair<Index_, Slab*> >& to_reuse) -> void {
386 // Defragmenting the existing chunks. We sort by offset to make
387 // sure that we're not clobbering in-use slabs during the copy().
388 sort_by_field(to_reuse, [](const std::pair<Index_, Slab*>& x) -> std::size_t { return x.second->offset; });
389
390 const auto dest = my_memory_pool.data();
391 std::size_t reused_offset = 0;
392 for (auto& x : to_reuse) {
393 auto& cur_offset = x.second->offset;
394 if (cur_offset != reused_offset) {
395 std::copy_n(dest + cur_offset, my_slab_size, dest + reused_offset);
396 cur_offset = reused_offset;
397 }
398 reused_offset += my_slab_size;
399 }
400
401 // If we don't have to transpose, we can extract data directly into the cache buffer.
402 // To do so, we try to form as many contiguous hyperslabs as possible, reducing the number of calls into the HDF5 library.
403 // Then we update the slab pointers to refer to the relevant slices of memory in the cache.
404 // We don't use hyperslab unions because they're slow as shit for non-contiguous hyperslabs.
405 sort_by_field(chunks, [](const std::pair<Index_, Slab*>& x) -> Index_ { return x.first; });
406
407 Index_ run_chunk_id = chunks.front().first;
408 Index_ run_length = tatami_chunked::get_chunk_length(my_dim_stats, run_chunk_id);
409 auto run_offset = reused_offset;
410 chunks.front().second->offset = run_offset;
411 auto total_used_offset = run_offset + my_slab_size;
412 Index_ last_chunk_id = run_chunk_id;
413
414 hsize_t count[2];
415 hsize_t offset[2];
416 count[1] = block_length;
417 offset[1] = block_start;
418
419 serialize([&]() -> void {
420 for (decltype(chunks.size()) ci = 1, cend = chunks.size(); ci < cend; ++ci) {
421 const auto& current_chunk = chunks[ci];
422 const auto current_chunk_id = current_chunk.first;
423
424 if (current_chunk_id - last_chunk_id > 1) { // save the existing run of chunks as one hyperslab, and start a new run.
425 count[0] = run_length;
426 offset[0] = run_chunk_id * my_dim_stats.chunk_length;
427 my_h5comp->memspace.setExtentSimple(2, count);
428 my_h5comp->memspace.selectAll();
429 my_h5comp->dataspace.selectHyperslab(H5S_SELECT_SET, count, offset);
430 my_h5comp->dataset.read(dest + run_offset, define_mem_type<CachedValue_>(), my_h5comp->memspace, my_h5comp->dataspace);
431
432 run_chunk_id = current_chunk_id;
433 run_length = 0;
434 run_offset = total_used_offset;
435 }
436
437 run_length += tatami_chunked::get_chunk_length(my_dim_stats, current_chunk_id);
438 current_chunk.second->offset = total_used_offset;
439 total_used_offset += my_slab_size;
440 last_chunk_id = current_chunk.first;
441 }
442
443 count[0] = run_length;
444 offset[0] = run_chunk_id * my_dim_stats.chunk_length;
445 my_h5comp->memspace.setExtentSimple(2, count);
446 my_h5comp->memspace.selectAll();
447 my_h5comp->dataspace.selectHyperslab(H5S_SELECT_SET, count, offset);
448 my_h5comp->dataset.read(dest + run_offset, define_mem_type<CachedValue_>(), my_h5comp->memspace, my_h5comp->dataspace);
449 });
450 }
451 );
452
453 auto ptr = my_memory_pool.data() + info.first->offset + sanisizer::product_unsafe<std::size_t>(block_length, info.second);
454 std::copy_n(ptr, block_length, buffer);
455 return buffer;
456 }
457};
458
459template<typename Index_, class Slab_, typename CachedValue_>
460CachedValue_* transpose_chunks(
461 std::vector<std::pair<Index_, Slab_*> >& chunks,
462 const tatami_chunked::ChunkDimensionStats<Index_>& dim_stats,
463 const Index_ non_target_length,
464 CachedValue_* transposition_buffer_ptr
465) {
466 if (non_target_length > 1) {
467 for (const auto& current_chunk : chunks) {
468 const Index_ chunk_length = tatami_chunked::get_chunk_length(dim_stats, current_chunk.first);
469 if (chunk_length <= 1) {
470 continue;
471 }
472
473 // We actually swap the pointers here, so the slab pointers might not point to the factory pool after this!
474 // Shouldn't matter as long as neither of them are exposed to the user.
475 auto& dest = current_chunk.second->data;
476 tatami::transpose(dest, non_target_length, chunk_length, transposition_buffer_ptr);
477 std::swap(dest, transposition_buffer_ptr);
478 }
479 }
480
481 return transposition_buffer_ptr;
482}
483
484template<typename Index_, typename CachedValue_>
485class OracularBlockTransposed {
486public:
487 OracularBlockTransposed(
488 const std::string& file_name,
489 const std::string& dataset_name,
490 tatami_chunked::ChunkDimensionStats<Index_> target_dim_stats,
492 const tatami_chunked::SlabCacheStats<Index_>& slab_stats,
493 [[maybe_unused]] const bool h5_row_is_target, // for consistency with the solo/myopic versions
494 [[maybe_unused]] const Index_ non_target_range_for_indexed
495 ) :
496 my_dim_stats(std::move(target_dim_stats)),
497 my_cache(std::move(oracle), slab_stats.max_slabs_in_cache),
498 my_slab_size(slab_stats.slab_size_in_elements),
499 my_memory_pool(sanisizer::product<decltype(my_memory_pool.size())>(slab_stats.max_slabs_in_cache, my_slab_size))
500 {
501 initialize(file_name, dataset_name, my_h5comp);
502 sanisizer::resize(my_transposition_buffer, my_slab_size);
503 my_transposition_buffer_ptr = my_transposition_buffer.data();
504 }
505
506 ~OracularBlockTransposed() {
507 destroy(my_h5comp);
508 }
509
510private:
511 std::unique_ptr<Components> my_h5comp;
512 tatami_chunked::ChunkDimensionStats<Index_> my_dim_stats;
513
514 struct Slab {
515 CachedValue_* data;
516 };
517
518 tatami_chunked::OracularSlabCache<Index_, Index_, Slab, false> my_cache;
519 std::size_t my_slab_size;
520 std::vector<CachedValue_> my_memory_pool;
521 std::size_t my_offset = 0;
522
523 bool my_h5_row_is_target;
524 std::vector<CachedValue_> my_transposition_buffer;
525 CachedValue_* my_transposition_buffer_ptr;
526
527public:
528 template<typename Value_>
529 const Value_* fetch_block([[maybe_unused]] Index_ i, Index_ block_start, Index_ block_length, Value_* buffer) {
530 auto info = my_cache.next(
531 /* identify = */ [&](Index_ current) -> std::pair<Index_, Index_> {
532 return std::pair<Index_, Index_>(current / my_dim_stats.chunk_length, current % my_dim_stats.chunk_length);
533 },
534 /* create = */ [&]() -> Slab {
535 Slab output;
536 output.data = my_memory_pool.data() + my_offset;
537 my_offset += my_slab_size;
538 return output;
539 },
540 /* populate = */ [&](std::vector<std::pair<Index_, Slab*> >& chunks) -> void {
541 // If we have to transpose, we extract slab-by-slab and transpose each one as it comes in.
542 // It's too hard to try to do an in-place transposition after reading everything into the cache.
543 hsize_t count[2];
544 hsize_t offset[2];
545 count[0] = block_length;
546 offset[0] = block_start;
547
548 serialize([&]() -> void {
549 for (const auto& current_chunk : chunks) {
550 const auto chunk_length = tatami_chunked::get_chunk_length(my_dim_stats, current_chunk.first);
551 count[1] = chunk_length;
552 offset[1] = current_chunk.first * my_dim_stats.chunk_length;
553 my_h5comp->memspace.setExtentSimple(2, count);
554 my_h5comp->memspace.selectAll();
555 my_h5comp->dataspace.selectHyperslab(H5S_SELECT_SET, count, offset);
556 my_h5comp->dataset.read(current_chunk.second->data, define_mem_type<CachedValue_>(), my_h5comp->memspace, my_h5comp->dataspace);
557 }
558 });
559
560 // Transposition is done outside the serial section to unblock other threads.
561 my_transposition_buffer_ptr = transpose_chunks(chunks, my_dim_stats, block_length, my_transposition_buffer_ptr);
562 }
563 );
564
565 auto ptr = info.first->data + sanisizer::product_unsafe<std::size_t>(block_length, info.second);
566 std::copy_n(ptr, block_length, buffer);
567 return buffer;
568 }
569};
570
571template<typename Index_, typename CachedValue_>
572class OracularIndexed {
573public:
574 OracularIndexed(
575 const std::string& file_name,
576 const std::string& dataset_name,
577 tatami_chunked::ChunkDimensionStats<Index_> target_dim_stats,
579 const tatami_chunked::SlabCacheStats<Index_>& slab_stats,
580 const bool h5_row_is_target,
581 const Index_ non_target_range_for_indexed
582 ) :
583 my_dim_stats(std::move(target_dim_stats)),
584 my_cache(std::move(oracle), slab_stats.max_slabs_in_cache),
585 my_slab_size(slab_stats.slab_size_in_elements),
586 my_memory_pool(sanisizer::product<decltype(my_memory_pool.size())>(slab_stats.max_slabs_in_cache, my_slab_size)),
587 my_h5_row_is_target(h5_row_is_target),
588 my_non_target_buffer_for_indexed(sanisizer::product<decltype(my_non_target_buffer_for_indexed.size())>(non_target_range_for_indexed, my_dim_stats.chunk_length))
589 {
590 initialize(file_name, dataset_name, my_h5comp);
591 if (!my_h5_row_is_target) {
592 sanisizer::resize(my_transposition_buffer, my_slab_size);
593 my_transposition_buffer_ptr = my_transposition_buffer.data();
594 }
595 }
596
597 ~OracularIndexed() {
598 destroy(my_h5comp);
599 }
600
601private:
602 std::unique_ptr<Components> my_h5comp;
603 tatami_chunked::ChunkDimensionStats<Index_> my_dim_stats;
604
605 struct Slab {
606 CachedValue_* data;
607 };
608
609 tatami_chunked::OracularSlabCache<Index_, Index_, Slab, false> my_cache;
610 std::size_t my_slab_size;
611 std::vector<CachedValue_> my_memory_pool;
612 std::size_t my_offset = 0;
613
614 bool my_h5_row_is_target;
615 std::vector<CachedValue_> my_transposition_buffer;
616 CachedValue_* my_transposition_buffer_ptr;
617
618 std::vector<CachedValue_> my_non_target_buffer_for_indexed;
619
620public:
621 template<typename Value_>
622 const Value_* fetch_indices([[maybe_unused]] Index_ i, const std::vector<Index_>& indices, Value_* buffer) {
623 const Index_ num_indices = indices.size();
624 auto info = my_cache.next(
625 /* identify = */ [&](Index_ current) -> std::pair<Index_, Index_> {
626 return std::pair<Index_, Index_>(current / my_dim_stats.chunk_length, current % my_dim_stats.chunk_length);
627 },
628 /* create = */ [&]() -> Slab {
629 Slab output;
630 output.data = my_memory_pool.data() + my_offset;
631 my_offset += my_slab_size;
632 return output;
633 },
634 /* populate = */ [&](std::vector<std::pair<Index_, Slab*> >& chunks) -> void {
635 serialize([&]() -> void {
636 for (const auto& current_chunk : chunks) {
637 extract_indices(
638 my_h5_row_is_target,
639 current_chunk.first * my_dim_stats.chunk_length,
640 tatami_chunked::get_chunk_length(my_dim_stats, current_chunk.first),
641 indices,
642 my_non_target_buffer_for_indexed.data(),
643 current_chunk.second->data,
644 *my_h5comp
645 );
646 }
647 });
648
649 // Transposition is done outside the serial section to unblock other threads.
650 if (!my_h5_row_is_target) {
651 my_transposition_buffer_ptr = transpose_chunks(chunks, my_dim_stats, num_indices, my_transposition_buffer_ptr);
652 }
653 }
654 );
655
656 auto ptr = info.first->data + sanisizer::product_unsafe<std::size_t>(num_indices, info.second);
657 std::copy_n(ptr, num_indices, buffer);
658 return buffer;
659 }
660};
661
662// COMMENT: technically, for all oracular extractors, we could pretend that the chunk length on the target dimension is 1.
663// This would allow us to only extract the desired indices on each call, allowing for more efficient use of the cache.
664//
665// The problem is that we would get harshly penalized for any chunk reuse outside of the current prediction cycle,
666// where the chunk would need to be read from disk again if the exact elements weren't already in the cache.
667// Consider a chunk length of 2 where we want to access the following indices {0, 2, 4, 5, 2, 3} and our cache is large enough to hold 4 elements.
668//
669// - With our current approach, the first prediction cycle would load the first and second chunks, i.e., {0, 1, 2, 3}.
670// The next prediction cycle would load the third chunk and re-use the second chunk to get {2, 3, 4, 5}.
671// - With a chunk length of 1, the first prediction cycle would load all three chunks but only keep {0, 2, 4, 5}.
672// The next prediction cycle would need to reload the second chunk to get {3}.
673//
674// This access pattern might not be uncommon after applying a DelayedSubset with shuffled rows/columns.
675
676/***************************
677 *** Concrete subclasses ***
678 ***************************/
679
680enum class OracularMode : char { NOT_APPLICABLE, BLOCK_NORMAL, BLOCK_TRANSPOSED, INDEXED };
681
682template<bool solo_, bool oracle_, OracularMode omode_, typename Index_, typename CachedValue_>
683using DenseCore = typename std::conditional<solo_,
684 SoloCore<oracle_, Index_, CachedValue_>,
685 typename std::conditional<!oracle_,
686 MyopicCore<Index_, CachedValue_>,
687 typename std::conditional<omode_ == OracularMode::BLOCK_NORMAL,
688 OracularBlockNormal<Index_, CachedValue_>,
689 typename std::conditional<omode_ == OracularMode::BLOCK_TRANSPOSED,
690 OracularBlockTransposed<Index_, CachedValue_>,
691 OracularIndexed<Index_, CachedValue_>
692 >::type
693 >::type
694 >::type
695>::type;
696
697template<bool solo_, bool oracle_, OracularMode omode_, typename Value_, typename Index_, typename CachedValue_>
698class Full final : public tatami::DenseExtractor<oracle_, Value_, Index_> {
699public:
700 Full(
701 const std::string& file_name,
702 const std::string& dataset_name,
703 tatami_chunked::ChunkDimensionStats<Index_> target_dim_stats,
705 Index_ non_target_dim,
706 const tatami_chunked::SlabCacheStats<Index_>& slab_stats,
707 bool h5_row_is_target
708 ) :
709 my_core(
710 file_name,
711 dataset_name,
712 std::move(target_dim_stats),
713 std::move(oracle),
714 slab_stats,
715 h5_row_is_target,
716 0
717 ),
718 my_non_target_dim(non_target_dim)
719 {}
720
721 const Value_* fetch(Index_ i, Value_* buffer) {
722 return my_core.fetch_block(i, 0, my_non_target_dim, buffer);
723 }
724
725private:
726 DenseCore<solo_, oracle_, omode_, Index_, CachedValue_> my_core;
727 Index_ my_non_target_dim;
728};
729
730template<bool solo_, bool oracle_, OracularMode omode_, typename Value_, typename Index_, typename CachedValue_>
731class Block final : public tatami::DenseExtractor<oracle_, Value_, Index_> {
732public:
733 Block(
734 const std::string& file_name,
735 const std::string& dataset_name,
736 tatami_chunked::ChunkDimensionStats<Index_> target_dim_stats,
738 Index_ block_start,
739 Index_ block_length,
740 const tatami_chunked::SlabCacheStats<Index_>& slab_stats,
741 const bool h5_row_is_target
742 ) :
743 my_core(
744 file_name,
745 dataset_name,
746 std::move(target_dim_stats),
747 std::move(oracle),
748 slab_stats,
749 h5_row_is_target,
750 0
751 ),
752 my_block_start(block_start),
753 my_block_length(block_length)
754 {}
755
756 const Value_* fetch(Index_ i, Value_* buffer) {
757 return my_core.fetch_block(i, my_block_start, my_block_length, buffer);
758 }
759
760private:
761 DenseCore<solo_, oracle_, omode_, Index_, CachedValue_> my_core;
762 Index_ my_block_start, my_block_length;
763};
764
765template<bool solo_, bool oracle_, OracularMode omode_, typename Value_, typename Index_, typename CachedValue_>
766class Index final : public tatami::DenseExtractor<oracle_, Value_, Index_> {
767public:
768 Index(
769 const std::string& file_name,
770 const std::string& dataset_name,
771 tatami_chunked::ChunkDimensionStats<Index_> target_dim_stats,
773 tatami::VectorPtr<Index_> indices_ptr,
774 const tatami_chunked::SlabCacheStats<Index_>& slab_stats,
775 const bool h5_row_is_target
776 ) :
777 my_core(
778 file_name,
779 dataset_name,
780 std::move(target_dim_stats),
781 std::move(oracle),
782 slab_stats,
783 h5_row_is_target,
784 indices_ptr->empty() ? 0 : (indices_ptr->back() - indices_ptr->front() + 1)
785 ),
786 my_indices_ptr(std::move(indices_ptr))
787 {}
788
789 const Value_* fetch(Index_ i, Value_* buffer) {
790 return my_core.fetch_indices(i, *my_indices_ptr, buffer);
791 }
792
793private:
794 DenseCore<solo_, oracle_, omode_, Index_, CachedValue_> my_core;
795 tatami::VectorPtr<Index_> my_indices_ptr;
796};
797
798}
827template<typename Value_, typename Index_, typename CachedValue_ = Value_>
828class DenseMatrix final : public tatami::Matrix<Value_, Index_> {
829 std::string my_file_name, my_dataset_name;
830 bool my_transpose;
831
832 std::size_t my_cache_size_in_elements;
833 bool my_require_minimum_cache;
834
835 tatami_chunked::ChunkDimensionStats<Index_> my_firstdim_stats, my_seconddim_stats;
836 bool my_prefer_firstdim;
837
838public:
847 DenseMatrix(std::string file, std::string name, bool transpose, const DenseMatrixOptions& options) :
848 my_file_name(std::move(file)),
849 my_dataset_name(std::move(name)),
850 my_transpose(transpose),
851 my_cache_size_in_elements(options.maximum_cache_size / sizeof(CachedValue_)),
852 my_require_minimum_cache(options.require_minimum_cache)
853 {
854 serialize([&]() -> void {
855 H5::H5File fhandle(my_file_name, H5F_ACC_RDONLY);
856 auto dhandle = open_and_check_dataset<false>(fhandle, my_dataset_name);
857 auto dims = get_array_dimensions<2>(dhandle, my_dataset_name);
858
859 hsize_t chunk_dims[2];
860 auto dparms = dhandle.getCreatePlist();
861 if (dparms.getLayout() != H5D_CHUNKED) {
862 // If contiguous, each firstdim is treated as a chunk.
863 chunk_dims[0] = 1;
864 chunk_dims[1] = dims[1];
865 } else {
866 dparms.getChunk(2, chunk_dims);
867 }
868
869 my_firstdim_stats = tatami_chunked::ChunkDimensionStats<Index_>(
870 sanisizer::cast<Index_>(dims[0]),
871 sanisizer::cast<Index_>(chunk_dims[0])
872 );
873 my_seconddim_stats = tatami_chunked::ChunkDimensionStats<Index_>(
874 sanisizer::cast<Index_>(dims[1]),
875 sanisizer::cast<Index_>(chunk_dims[1])
876 );
877 });
878
879 // Favoring extraction on the dimension that involves pulling out fewer
880 // chunks per dimension element. Remember, 'firstdim_stats.num_chunks'
881 // represents the number of chunks along the first dimension, and thus
882 // is the number of chunks that need to be loaded to pull out an
883 // element of the **second** dimension; and vice versa.
884 my_prefer_firstdim = (my_firstdim_stats.num_chunks > my_seconddim_stats.num_chunks);
885 }
886
893 DenseMatrix(std::string file, std::string name, bool transpose) :
894 DenseMatrix(std::move(file), std::move(name), transpose, DenseMatrixOptions()) {}
895
896private:
897 bool prefer_rows_internal() const {
898 if (my_transpose) {
899 return !my_prefer_firstdim;
900 } else {
901 return my_prefer_firstdim;
902 }
903 }
904
905 Index_ nrow_internal() const {
906 if (my_transpose) {
907 return my_seconddim_stats.dimension_extent;
908 } else {
909 return my_firstdim_stats.dimension_extent;
910 }
911 }
912
913 Index_ ncol_internal() const {
914 if (my_transpose) {
915 return my_firstdim_stats.dimension_extent;
916 } else {
917 return my_seconddim_stats.dimension_extent;
918 }
919 }
920
921public:
922 Index_ nrow() const {
923 return nrow_internal();
924 }
925
926 Index_ ncol() const {
927 return ncol_internal();
928 }
929
937 bool prefer_rows() const {
938 return prefer_rows_internal();
939 }
940
941 double prefer_rows_proportion() const {
942 return static_cast<double>(prefer_rows_internal());
943 }
944
945 bool uses_oracle(bool) const {
946 return true;
947 }
948
949 bool is_sparse() const {
950 return false;
951 }
952
953 double is_sparse_proportion() const {
954 return 0;
955 }
956
957 using tatami::Matrix<Value_, Index_>::dense;
958
959 using tatami::Matrix<Value_, Index_>::sparse;
960
961private:
962 template<bool oracle_, template<bool, bool, DenseMatrix_internal::OracularMode, typename, typename, typename> class Extractor_, typename ... Args_>
963 std::unique_ptr<tatami::DenseExtractor<oracle_, Value_, Index_> > populate(
964 bool row,
965 Index_ non_target_length,
967 Args_&& ... args
968 ) const {
969 bool by_h5_row = (row != my_transpose);
970 const auto& dim_stats = (by_h5_row ? my_firstdim_stats : my_seconddim_stats);
971
972 tatami_chunked::SlabCacheStats<Index_> slab_stats(
973 dim_stats.chunk_length,
974 non_target_length,
975 dim_stats.num_chunks,
976 my_cache_size_in_elements,
977 my_require_minimum_cache
978 );
979
980 if (slab_stats.max_slabs_in_cache == 0) {
981 return std::make_unique<Extractor_<true, oracle_, DenseMatrix_internal::OracularMode::NOT_APPLICABLE, Value_, Index_, CachedValue_> >(
982 my_file_name, my_dataset_name, dim_stats, std::move(oracle), std::forward<Args_>(args)..., slab_stats, by_h5_row
983 );
984 }
985
986 if constexpr(oracle_) {
987 typedef DenseMatrix_internal::Index<false, oracle_, DenseMatrix_internal::OracularMode::INDEXED, Value_, Index_, CachedValue_> CurIndex;
988 if constexpr(std::is_same<CurIndex, Extractor_<false, oracle_, DenseMatrix_internal::OracularMode::INDEXED, Value_, Index_, CachedValue_> >::value) {
989 return std::make_unique<CurIndex>(
990 my_file_name, my_dataset_name, dim_stats, std::move(oracle), std::forward<Args_>(args)..., slab_stats, by_h5_row
991 );
992 } else {
993 if (by_h5_row) {
994 return std::make_unique<Extractor_<false, oracle_, DenseMatrix_internal::OracularMode::BLOCK_NORMAL, Value_, Index_, CachedValue_> >(
995 my_file_name, my_dataset_name, dim_stats, std::move(oracle), std::forward<Args_>(args)..., slab_stats, by_h5_row
996 );
997 } else {
998 return std::make_unique<Extractor_<false, oracle_, DenseMatrix_internal::OracularMode::BLOCK_TRANSPOSED, Value_, Index_, CachedValue_> >(
999 my_file_name, my_dataset_name, dim_stats, std::move(oracle), std::forward<Args_>(args)..., slab_stats, by_h5_row
1000 );
1001 }
1002 }
1003 } else {
1004 return std::make_unique<Extractor_<false, oracle_, DenseMatrix_internal::OracularMode::NOT_APPLICABLE, Value_, Index_, CachedValue_> >(
1005 my_file_name, my_dataset_name, dim_stats, std::move(oracle), std::forward<Args_>(args)..., slab_stats, by_h5_row
1006 );
1007 }
1008 }
1009
1010 /********************
1011 *** Myopic dense ***
1012 ********************/
1013public:
1014 std::unique_ptr<tatami::MyopicDenseExtractor<Value_, Index_> > dense(bool row, const tatami::Options&) const {
1015 Index_ full_non_target = (row ? ncol_internal() : nrow_internal());
1016 return populate<false, DenseMatrix_internal::Full>(row, full_non_target, false, full_non_target);
1017 }
1018
1019 std::unique_ptr<tatami::MyopicDenseExtractor<Value_, Index_> > dense(bool row, Index_ block_start, Index_ block_length, const tatami::Options&) const {
1020 return populate<false, DenseMatrix_internal::Block>(row, block_length, false, block_start, block_length);
1021 }
1022
1023 std::unique_ptr<tatami::MyopicDenseExtractor<Value_, Index_> > dense(bool row, tatami::VectorPtr<Index_> indices_ptr, const tatami::Options&) const {
1024 auto nidx = indices_ptr->size();
1025 return populate<false, DenseMatrix_internal::Index>(row, nidx, false, std::move(indices_ptr));
1026 }
1027
1028 /*********************
1029 *** Myopic sparse ***
1030 *********************/
1031public:
1032 std::unique_ptr<tatami::MyopicSparseExtractor<Value_, Index_> > sparse(bool row, const tatami::Options& opt) const {
1033 Index_ full_non_target = (row ? ncol_internal() : nrow_internal());
1034 return std::make_unique<tatami::FullSparsifiedWrapper<false, Value_, Index_> >(dense(row, opt), full_non_target, opt);
1035 }
1036
1037 std::unique_ptr<tatami::MyopicSparseExtractor<Value_, Index_> > sparse(bool row, Index_ block_start, Index_ block_length, const tatami::Options& opt) const {
1038 return std::make_unique<tatami::BlockSparsifiedWrapper<false, Value_, Index_> >(dense(row, block_start, block_length, opt), block_start, block_length, opt);
1039 }
1040
1041 std::unique_ptr<tatami::MyopicSparseExtractor<Value_, Index_> > sparse(bool row, tatami::VectorPtr<Index_> indices_ptr, const tatami::Options& opt) const {
1042 auto ptr = dense(row, indices_ptr, opt);
1043 return std::make_unique<tatami::IndexSparsifiedWrapper<false, Value_, Index_> >(std::move(ptr), std::move(indices_ptr), opt);
1044 }
1045
1046 /**********************
1047 *** Oracular dense ***
1048 **********************/
1049public:
1050 std::unique_ptr<tatami::OracularDenseExtractor<Value_, Index_> > dense(
1051 bool row,
1052 std::shared_ptr<const tatami::Oracle<Index_> > oracle,
1053 const tatami::Options&
1054 ) const {
1055 Index_ full_non_target = (row ? ncol_internal() : nrow_internal());
1056 return populate<true, DenseMatrix_internal::Full>(row, full_non_target, std::move(oracle), full_non_target);
1057 }
1058
1059 std::unique_ptr<tatami::OracularDenseExtractor<Value_, Index_> > dense(
1060 bool row,
1061 std::shared_ptr<const tatami::Oracle<Index_> > oracle,
1062 Index_ block_start,
1063 Index_ block_length,
1064 const tatami::Options&
1065 ) const {
1066 return populate<true, DenseMatrix_internal::Block>(row, block_length, std::move(oracle), block_start, block_length);
1067 }
1068
1069 std::unique_ptr<tatami::OracularDenseExtractor<Value_, Index_> > dense(
1070 bool row,
1071 std::shared_ptr<const tatami::Oracle<Index_> > oracle,
1072 tatami::VectorPtr<Index_> indices_ptr,
1073 const tatami::Options&
1074 ) const {
1075 auto nidx = indices_ptr->size();
1076 return populate<true, DenseMatrix_internal::Index>(row, nidx, std::move(oracle), std::move(indices_ptr));
1077 }
1078
1079 /***********************
1080 *** Oracular sparse ***
1081 ***********************/
1082public:
1083 std::unique_ptr<tatami::OracularSparseExtractor<Value_, Index_> > sparse(
1084 bool row,
1085 std::shared_ptr<const tatami::Oracle<Index_> > oracle,
1086 const tatami::Options& opt
1087 ) const {
1088 Index_ full_non_target = (row ? ncol_internal() : nrow_internal());
1089 return std::make_unique<tatami::FullSparsifiedWrapper<true, Value_, Index_> >(dense(row, std::move(oracle), opt), full_non_target, opt);
1090 }
1091
1092 std::unique_ptr<tatami::OracularSparseExtractor<Value_, Index_> > sparse(
1093 bool row,
1094 std::shared_ptr<const tatami::Oracle<Index_> > oracle,
1095 Index_ block_start,
1096 Index_ block_length,
1097 const tatami::Options& opt
1098 ) const {
1099 return std::make_unique<tatami::BlockSparsifiedWrapper<true, Value_, Index_> >(dense(row, std::move(oracle), block_start, block_length, opt), block_start, block_length, opt);
1100 }
1101
1102 std::unique_ptr<tatami::OracularSparseExtractor<Value_, Index_> > sparse(
1103 bool row,
1104 std::shared_ptr<const tatami::Oracle<Index_> > oracle,
1105 tatami::VectorPtr<Index_> indices_ptr,
1106 const tatami::Options& opt
1107 ) const {
1108 auto ptr = dense(row, std::move(oracle), indices_ptr, opt);
1109 return std::make_unique<tatami::IndexSparsifiedWrapper<true, Value_, Index_> >(std::move(ptr), std::move(indices_ptr), opt);
1110 }
1111};
1112
1113}
1114
1115#endif
Dense matrix backed by a DataSet in a HDF5 file.
Definition DenseMatrix.hpp:828
DenseMatrix(std::string file, std::string name, bool transpose)
Definition DenseMatrix.hpp:893
bool prefer_rows() const
Definition DenseMatrix.hpp:937
DenseMatrix(std::string file, std::string name, bool transpose, const DenseMatrixOptions &options)
Definition DenseMatrix.hpp:847
Representations for matrix data in HDF5 files.
Definition CompressedSparseMatrix.hpp:24
void serialize(Function_ f)
Definition serialize.hpp:53
std::shared_ptr< const std::vector< Index_ > > VectorPtr
void transpose(const Input_ *const input, const std::size_t nrow, const std::size_t ncol, const std::size_t input_stride, Output_ *const output, const std::size_t output_stride)
typename std::conditional< oracle_, std::shared_ptr< const Oracle< Index_ > >, bool >::type MaybeOracle
typename std::conditional< oracle_, OracularDenseExtractor< Value_, Index_ >, MyopicDenseExtractor< Value_, Index_ > >::type DenseExtractor
Default locking for serial access.
Options for DenseMatrix extraction.
Definition DenseMatrix.hpp:27
std::size_t maximum_cache_size
Definition DenseMatrix.hpp:37
bool require_minimum_cache
Definition DenseMatrix.hpp:44
Utilities for HDF5 extraction.