1#ifndef TATAMI_HDF5_DENSE_MATRIX_HPP
2#define TATAMI_HDF5_DENSE_MATRIX_HPP
13#include "tatami_chunked/tatami_chunked.hpp"
14#include "sanisizer/sanisizer.hpp"
50namespace DenseMatrix_internal {
56 H5::DataSpace dataspace;
57 H5::DataSpace memspace;
64template<
typename Index_,
typename OutputValue_>
65void extract_block(
bool h5_row_is_target, Index_ cache_start, Index_ cache_length, Index_ block_start, Index_ block_length, OutputValue_* buffer, Components& comp) {
69 int target_dim = 1 - h5_row_is_target;
70 offset[target_dim] = cache_start;
71 count[target_dim] = cache_length;
73 int non_target_dim = h5_row_is_target;
74 offset[non_target_dim] = block_start;
75 count[non_target_dim] = block_length;
76 comp.dataspace.selectHyperslab(H5S_SELECT_SET, count, offset);
80 comp.memspace.setExtentSimple(2, count);
81 comp.memspace.selectAll();
83 comp.dataset.read(buffer, define_mem_type<OutputValue_>(), comp.memspace, comp.dataspace);
86template<
typename Index_,
typename TmpValue_,
typename OutputValue_>
88 bool h5_row_is_target,
91 const std::vector<Index_>& indices,
92 TmpValue_* tmp_buffer,
93 OutputValue_* final_buffer,
96 const Index_ num_indices = indices.size();
97 if (num_indices == 0) {
104 int target_dim = 1 - h5_row_is_target;
105 offset[target_dim] = chunk_start;
106 count[target_dim] = chunk_length;
110 int non_target_dim = h5_row_is_target;
111 const auto first_index = indices.front();
112 offset[non_target_dim] = first_index;
113 const auto non_target_range = indices.back() - first_index + 1;
114 count[non_target_dim] = non_target_range;
116 comp.dataspace.selectHyperslab(H5S_SELECT_SET, count, offset);
117 comp.memspace.setExtentSimple(2, count);
118 comp.memspace.selectAll();
119 comp.dataset.read(tmp_buffer, define_mem_type<OutputValue_>(), comp.memspace, comp.dataspace);
121 if (h5_row_is_target) {
122 for (Index_ t = 0; t < chunk_length; ++t) {
123 for (Index_ i = 0; i < num_indices; ++i) {
124 const auto in_offset = sanisizer::nd_offset<std::size_t>(indices[i] - first_index, non_target_range, t);
125 const auto out_offset = sanisizer::nd_offset<std::size_t>(i, num_indices, t);
126 final_buffer[out_offset] = tmp_buffer[in_offset];
130 for (Index_ i = 0; i < num_indices; ++i) {
131 const auto in_offset = sanisizer::product_unsafe<std::size_t>(chunk_length, indices[i] - first_index);
132 const auto out_offset = sanisizer::product_unsafe<std::size_t>(chunk_length, i);
133 std::copy_n(tmp_buffer + in_offset, chunk_length, final_buffer + out_offset);
145inline void initialize(
const std::string& file_name,
const std::string& dataset_name, std::unique_ptr<Components>& h5comp) {
147 h5comp.reset(
new Components);
152 H5::FileAccPropList fapl(H5::FileAccPropList::DEFAULT.getId());
153 fapl.setCache(0, 0, 0, 0);
155 h5comp->file.openFile(file_name, H5F_ACC_RDONLY, fapl);
156 h5comp->dataset = h5comp->file.openDataSet(dataset_name);
157 h5comp->dataspace = h5comp->dataset.getSpace();
161inline void destroy(std::unique_ptr<Components>& h5comp) {
167template<
bool oracle_,
typename Index_,
typename CachedValue_>
171 const std::string& file_name,
172 const std::string& dataset_name,
173 [[maybe_unused]] tatami_chunked::ChunkDimensionStats<Index_> target_dim_stats,
175 [[maybe_unused]]
const tatami_chunked::SlabCacheStats<Index_>& slab_stats,
176 const bool h5_row_is_target,
177 const Index_ non_target_range_for_indexed
179 my_oracle(std::move(oracle)),
180 my_h5_row_is_target(h5_row_is_target),
181 my_non_target_buffer_for_indexed(sanisizer::cast<decltype(my_non_target_buffer_for_indexed.size())>(non_target_range_for_indexed))
183 initialize(file_name, dataset_name, my_h5comp);
191 std::unique_ptr<Components> my_h5comp;
193 typename std::conditional<oracle_, tatami::PredictionIndex, bool>::type my_counter = 0;
194 bool my_h5_row_is_target;
195 std::vector<CachedValue_> my_non_target_buffer_for_indexed;
198 template<
typename Value_>
199 const Value_* fetch_block(Index_ i, Index_ block_start, Index_ block_length, Value_* buffer) {
200 if constexpr(oracle_) {
201 i = my_oracle->get(my_counter++);
204 extract_block(my_h5_row_is_target, i,
static_cast<Index_
>(1), block_start, block_length, buffer, *my_h5comp);
209 template<
typename Value_>
210 const Value_* fetch_indices(Index_ i,
const std::vector<Index_>& indices, Value_* buffer) {
211 if constexpr(oracle_) {
212 i = my_oracle->get(my_counter++);
215 extract_indices(my_h5_row_is_target, i,
static_cast<Index_
>(1), indices, my_non_target_buffer_for_indexed.data(), buffer, *my_h5comp);
221template<
typename Index_,
typename CachedValue_>
225 const std::string& file_name,
226 const std::string& dataset_name,
227 tatami_chunked::ChunkDimensionStats<Index_> target_dim_stats,
229 const tatami_chunked::SlabCacheStats<Index_>& slab_stats,
230 const bool h5_row_is_target,
231 const Index_ non_target_range_for_indexed
233 my_dim_stats(std::move(target_dim_stats)),
234 my_factory(slab_stats),
235 my_cache(slab_stats.max_slabs_in_cache),
236 my_h5_row_is_target(h5_row_is_target),
237 my_non_target_buffer_for_indexed(sanisizer::product<decltype(my_non_target_buffer_for_indexed.size())>(non_target_range_for_indexed, my_dim_stats.chunk_length))
239 initialize(file_name, dataset_name, my_h5comp);
240 if (!my_h5_row_is_target) {
241 sanisizer::resize(my_transposition_buffer, slab_stats.slab_size_in_elements);
250 std::unique_ptr<Components> my_h5comp;
251 tatami_chunked::ChunkDimensionStats<Index_> my_dim_stats;
253 tatami_chunked::DenseSlabFactory<CachedValue_> my_factory;
254 typedef typename decltype(my_factory)::Slab Slab;
255 tatami_chunked::LruSlabCache<Index_, Slab> my_cache;
257 bool my_h5_row_is_target;
258 std::vector<CachedValue_> my_transposition_buffer;
260 std::vector<CachedValue_> my_non_target_buffer_for_indexed;
263 template<
typename Value_,
class Extract_>
264 void fetch_raw(Index_ i, Value_* buffer, Index_ non_target_length, Extract_ extract) {
265 Index_ chunk = i / my_dim_stats.chunk_length;
266 Index_ index = i % my_dim_stats.chunk_length;
268 const auto& info = my_cache.find(
271 return my_factory.create();
273 [&](Index_ id, Slab& contents) ->
void {
274 auto curdim = tatami_chunked::get_chunk_length(my_dim_stats,
id);
276 if (my_h5_row_is_target) {
278 extract(
id * my_dim_stats.chunk_length, curdim, contents.data);
283 auto tptr = my_transposition_buffer.data();
285 extract(
id * my_dim_stats.chunk_length, curdim, tptr);
292 auto ptr = info.data + sanisizer::product_unsafe<std::size_t>(non_target_length, index);
293 std::copy_n(ptr, non_target_length, buffer);
297 template<
typename Value_>
298 const Value_* fetch_block(Index_ i, Index_ block_start, Index_ block_length, Value_* buffer) {
303 [&](Index_ start, Index_ length, CachedValue_* buf) ->
void {
304 extract_block(my_h5_row_is_target, start, length, block_start, block_length, buf, *my_h5comp);
310 template<
typename Value_>
311 const Value_* fetch_indices(Index_ i,
const std::vector<Index_>& indices, Value_* buffer) {
316 [&](Index_ start, Index_ length, CachedValue_* buf) ->
void {
317 extract_indices(my_h5_row_is_target, start, length, indices, my_non_target_buffer_for_indexed.data(), buf, *my_h5comp);
324template<
typename Index_,
typename CachedValue_>
325class OracularBlockNormal {
328 const std::string& file_name,
329 const std::string& dataset_name,
330 tatami_chunked::ChunkDimensionStats<Index_> target_dim_stats,
332 const tatami_chunked::SlabCacheStats<Index_>& slab_stats,
333 [[maybe_unused]]
const bool h5_row_is_target,
334 [[maybe_unused]]
const Index_ non_target_range_for_indexed
336 my_dim_stats(std::move(target_dim_stats)),
337 my_cache(std::move(oracle), slab_stats.max_slabs_in_cache),
338 my_slab_size(slab_stats.slab_size_in_elements),
339 my_memory_pool(sanisizer::product<decltype(my_memory_pool.size())>(slab_stats.max_slabs_in_cache, my_slab_size))
341 initialize(file_name, dataset_name, my_h5comp);
344 ~OracularBlockNormal() {
349 std::unique_ptr<Components> my_h5comp;
350 tatami_chunked::ChunkDimensionStats<Index_> my_dim_stats;
356 tatami_chunked::OracularSlabCache<Index_, Index_, Slab, true> my_cache;
357 std::size_t my_slab_size;
358 std::vector<CachedValue_> my_memory_pool;
359 std::size_t my_offset = 0;
362 template<
class Function_>
363 static void sort_by_field(std::vector<std::pair<Index_, Slab*> >& indices, Function_ field) {
364 auto comp = [&field](
const std::pair<Index_, Slab*>& l,
const std::pair<Index_, Slab*>& r) ->
bool {
365 return field(l) < field(r);
367 if (!std::is_sorted(indices.begin(), indices.end(), comp)) {
368 std::sort(indices.begin(), indices.end(), comp);
373 template<
typename Value_>
374 const Value_* fetch_block([[maybe_unused]] Index_ i, Index_ block_start, Index_ block_length, Value_* buffer) {
375 auto info = my_cache.next(
376 [&](Index_ current) -> std::pair<Index_, Index_> {
377 return std::pair<Index_, Index_>(current / my_dim_stats.chunk_length, current % my_dim_stats.chunk_length);
381 output.offset = my_offset;
382 my_offset += my_slab_size;
385 [&](std::vector<std::pair<Index_, Slab*> >& chunks, std::vector<std::pair<Index_, Slab*> >& to_reuse) ->
void {
388 sort_by_field(to_reuse, [](
const std::pair<Index_, Slab*>& x) -> std::size_t {
return x.second->offset; });
390 const auto dest = my_memory_pool.data();
391 std::size_t reused_offset = 0;
392 for (
auto& x : to_reuse) {
393 auto& cur_offset = x.second->offset;
394 if (cur_offset != reused_offset) {
395 std::copy_n(dest + cur_offset, my_slab_size, dest + reused_offset);
396 cur_offset = reused_offset;
398 reused_offset += my_slab_size;
405 sort_by_field(chunks, [](
const std::pair<Index_, Slab*>& x) -> Index_ {
return x.first; });
407 Index_ run_chunk_id = chunks.front().first;
408 Index_ run_length = tatami_chunked::get_chunk_length(my_dim_stats, run_chunk_id);
409 auto run_offset = reused_offset;
410 chunks.front().second->offset = run_offset;
411 auto total_used_offset = run_offset + my_slab_size;
412 Index_ last_chunk_id = run_chunk_id;
416 count[1] = block_length;
417 offset[1] = block_start;
420 for (
decltype(chunks.size()) ci = 1, cend = chunks.size(); ci < cend; ++ci) {
421 const auto& current_chunk = chunks[ci];
422 const auto current_chunk_id = current_chunk.first;
424 if (current_chunk_id - last_chunk_id > 1) {
425 count[0] = run_length;
426 offset[0] = run_chunk_id * my_dim_stats.chunk_length;
427 my_h5comp->memspace.setExtentSimple(2, count);
428 my_h5comp->memspace.selectAll();
429 my_h5comp->dataspace.selectHyperslab(H5S_SELECT_SET, count, offset);
430 my_h5comp->dataset.read(dest + run_offset, define_mem_type<CachedValue_>(), my_h5comp->memspace, my_h5comp->dataspace);
432 run_chunk_id = current_chunk_id;
434 run_offset = total_used_offset;
437 run_length += tatami_chunked::get_chunk_length(my_dim_stats, current_chunk_id);
438 current_chunk.second->offset = total_used_offset;
439 total_used_offset += my_slab_size;
440 last_chunk_id = current_chunk.first;
443 count[0] = run_length;
444 offset[0] = run_chunk_id * my_dim_stats.chunk_length;
445 my_h5comp->memspace.setExtentSimple(2, count);
446 my_h5comp->memspace.selectAll();
447 my_h5comp->dataspace.selectHyperslab(H5S_SELECT_SET, count, offset);
448 my_h5comp->dataset.read(dest + run_offset, define_mem_type<CachedValue_>(), my_h5comp->memspace, my_h5comp->dataspace);
453 auto ptr = my_memory_pool.data() + info.first->offset + sanisizer::product_unsafe<std::size_t>(block_length, info.second);
454 std::copy_n(ptr, block_length, buffer);
459template<
typename Index_,
class Slab_,
typename CachedValue_>
460CachedValue_* transpose_chunks(
461 std::vector<std::pair<Index_, Slab_*> >& chunks,
462 const tatami_chunked::ChunkDimensionStats<Index_>& dim_stats,
463 const Index_ non_target_length,
464 CachedValue_* transposition_buffer_ptr
466 if (non_target_length > 1) {
467 for (
const auto& current_chunk : chunks) {
468 const Index_ chunk_length = tatami_chunked::get_chunk_length(dim_stats, current_chunk.first);
469 if (chunk_length <= 1) {
475 auto& dest = current_chunk.second->data;
476 tatami::transpose(dest, non_target_length, chunk_length, transposition_buffer_ptr);
477 std::swap(dest, transposition_buffer_ptr);
481 return transposition_buffer_ptr;
484template<
typename Index_,
typename CachedValue_>
485class OracularBlockTransposed {
487 OracularBlockTransposed(
488 const std::string& file_name,
489 const std::string& dataset_name,
490 tatami_chunked::ChunkDimensionStats<Index_> target_dim_stats,
492 const tatami_chunked::SlabCacheStats<Index_>& slab_stats,
493 [[maybe_unused]]
const bool h5_row_is_target,
494 [[maybe_unused]]
const Index_ non_target_range_for_indexed
496 my_dim_stats(std::move(target_dim_stats)),
497 my_cache(std::move(oracle), slab_stats.max_slabs_in_cache),
498 my_slab_size(slab_stats.slab_size_in_elements),
499 my_memory_pool(sanisizer::product<decltype(my_memory_pool.size())>(slab_stats.max_slabs_in_cache, my_slab_size))
501 initialize(file_name, dataset_name, my_h5comp);
502 sanisizer::resize(my_transposition_buffer, my_slab_size);
503 my_transposition_buffer_ptr = my_transposition_buffer.data();
506 ~OracularBlockTransposed() {
511 std::unique_ptr<Components> my_h5comp;
512 tatami_chunked::ChunkDimensionStats<Index_> my_dim_stats;
518 tatami_chunked::OracularSlabCache<Index_, Index_, Slab, false> my_cache;
519 std::size_t my_slab_size;
520 std::vector<CachedValue_> my_memory_pool;
521 std::size_t my_offset = 0;
523 bool my_h5_row_is_target;
524 std::vector<CachedValue_> my_transposition_buffer;
525 CachedValue_* my_transposition_buffer_ptr;
528 template<
typename Value_>
529 const Value_* fetch_block([[maybe_unused]] Index_ i, Index_ block_start, Index_ block_length, Value_* buffer) {
530 auto info = my_cache.next(
531 [&](Index_ current) -> std::pair<Index_, Index_> {
532 return std::pair<Index_, Index_>(current / my_dim_stats.chunk_length, current % my_dim_stats.chunk_length);
536 output.data = my_memory_pool.data() + my_offset;
537 my_offset += my_slab_size;
540 [&](std::vector<std::pair<Index_, Slab*> >& chunks) ->
void {
545 count[0] = block_length;
546 offset[0] = block_start;
549 for (
const auto& current_chunk : chunks) {
550 const auto chunk_length = tatami_chunked::get_chunk_length(my_dim_stats, current_chunk.first);
551 count[1] = chunk_length;
552 offset[1] = current_chunk.first * my_dim_stats.chunk_length;
553 my_h5comp->memspace.setExtentSimple(2, count);
554 my_h5comp->memspace.selectAll();
555 my_h5comp->dataspace.selectHyperslab(H5S_SELECT_SET, count, offset);
556 my_h5comp->dataset.read(current_chunk.second->data, define_mem_type<CachedValue_>(), my_h5comp->memspace, my_h5comp->dataspace);
561 my_transposition_buffer_ptr = transpose_chunks(chunks, my_dim_stats, block_length, my_transposition_buffer_ptr);
565 auto ptr = info.first->data + sanisizer::product_unsafe<std::size_t>(block_length, info.second);
566 std::copy_n(ptr, block_length, buffer);
571template<
typename Index_,
typename CachedValue_>
572class OracularIndexed {
575 const std::string& file_name,
576 const std::string& dataset_name,
577 tatami_chunked::ChunkDimensionStats<Index_> target_dim_stats,
579 const tatami_chunked::SlabCacheStats<Index_>& slab_stats,
580 const bool h5_row_is_target,
581 const Index_ non_target_range_for_indexed
583 my_dim_stats(std::move(target_dim_stats)),
584 my_cache(std::move(oracle), slab_stats.max_slabs_in_cache),
585 my_slab_size(slab_stats.slab_size_in_elements),
586 my_memory_pool(sanisizer::product<decltype(my_memory_pool.size())>(slab_stats.max_slabs_in_cache, my_slab_size)),
587 my_h5_row_is_target(h5_row_is_target),
588 my_non_target_buffer_for_indexed(sanisizer::product<decltype(my_non_target_buffer_for_indexed.size())>(non_target_range_for_indexed, my_dim_stats.chunk_length))
590 initialize(file_name, dataset_name, my_h5comp);
591 if (!my_h5_row_is_target) {
592 sanisizer::resize(my_transposition_buffer, my_slab_size);
593 my_transposition_buffer_ptr = my_transposition_buffer.data();
602 std::unique_ptr<Components> my_h5comp;
603 tatami_chunked::ChunkDimensionStats<Index_> my_dim_stats;
609 tatami_chunked::OracularSlabCache<Index_, Index_, Slab, false> my_cache;
610 std::size_t my_slab_size;
611 std::vector<CachedValue_> my_memory_pool;
612 std::size_t my_offset = 0;
614 bool my_h5_row_is_target;
615 std::vector<CachedValue_> my_transposition_buffer;
616 CachedValue_* my_transposition_buffer_ptr;
618 std::vector<CachedValue_> my_non_target_buffer_for_indexed;
621 template<
typename Value_>
622 const Value_* fetch_indices([[maybe_unused]] Index_ i,
const std::vector<Index_>& indices, Value_* buffer) {
623 const Index_ num_indices = indices.size();
624 auto info = my_cache.next(
625 [&](Index_ current) -> std::pair<Index_, Index_> {
626 return std::pair<Index_, Index_>(current / my_dim_stats.chunk_length, current % my_dim_stats.chunk_length);
630 output.data = my_memory_pool.data() + my_offset;
631 my_offset += my_slab_size;
634 [&](std::vector<std::pair<Index_, Slab*> >& chunks) ->
void {
636 for (
const auto& current_chunk : chunks) {
639 current_chunk.first * my_dim_stats.chunk_length,
640 tatami_chunked::get_chunk_length(my_dim_stats, current_chunk.first),
642 my_non_target_buffer_for_indexed.data(),
643 current_chunk.second->data,
650 if (!my_h5_row_is_target) {
651 my_transposition_buffer_ptr = transpose_chunks(chunks, my_dim_stats, num_indices, my_transposition_buffer_ptr);
656 auto ptr = info.first->data + sanisizer::product_unsafe<std::size_t>(num_indices, info.second);
657 std::copy_n(ptr, num_indices, buffer);
680enum class OracularMode :
char { NOT_APPLICABLE, BLOCK_NORMAL, BLOCK_TRANSPOSED, INDEXED };
682template<
bool solo_,
bool oracle_, OracularMode omode_,
typename Index_,
typename CachedValue_>
683using DenseCore =
typename std::conditional<solo_,
684 SoloCore<oracle_, Index_, CachedValue_>,
685 typename std::conditional<!oracle_,
686 MyopicCore<Index_, CachedValue_>,
687 typename std::conditional<omode_ == OracularMode::BLOCK_NORMAL,
688 OracularBlockNormal<Index_, CachedValue_>,
689 typename std::conditional<omode_ == OracularMode::BLOCK_TRANSPOSED,
690 OracularBlockTransposed<Index_, CachedValue_>,
691 OracularIndexed<Index_, CachedValue_>
697template<
bool solo_,
bool oracle_, OracularMode omode_,
typename Value_,
typename Index_,
typename CachedValue_>
701 const std::string& file_name,
702 const std::string& dataset_name,
703 tatami_chunked::ChunkDimensionStats<Index_> target_dim_stats,
705 Index_ non_target_dim,
706 const tatami_chunked::SlabCacheStats<Index_>& slab_stats,
707 bool h5_row_is_target
712 std::move(target_dim_stats),
718 my_non_target_dim(non_target_dim)
721 const Value_* fetch(Index_ i, Value_* buffer) {
722 return my_core.fetch_block(i, 0, my_non_target_dim, buffer);
726 DenseCore<solo_, oracle_, omode_, Index_, CachedValue_> my_core;
727 Index_ my_non_target_dim;
730template<
bool solo_,
bool oracle_, OracularMode omode_,
typename Value_,
typename Index_,
typename CachedValue_>
734 const std::string& file_name,
735 const std::string& dataset_name,
736 tatami_chunked::ChunkDimensionStats<Index_> target_dim_stats,
740 const tatami_chunked::SlabCacheStats<Index_>& slab_stats,
741 const bool h5_row_is_target
746 std::move(target_dim_stats),
752 my_block_start(block_start),
753 my_block_length(block_length)
756 const Value_* fetch(Index_ i, Value_* buffer) {
757 return my_core.fetch_block(i, my_block_start, my_block_length, buffer);
761 DenseCore<solo_, oracle_, omode_, Index_, CachedValue_> my_core;
762 Index_ my_block_start, my_block_length;
765template<
bool solo_,
bool oracle_, OracularMode omode_,
typename Value_,
typename Index_,
typename CachedValue_>
769 const std::string& file_name,
770 const std::string& dataset_name,
771 tatami_chunked::ChunkDimensionStats<Index_> target_dim_stats,
774 const tatami_chunked::SlabCacheStats<Index_>& slab_stats,
775 const bool h5_row_is_target
780 std::move(target_dim_stats),
784 indices_ptr->empty() ? 0 : (indices_ptr->back() - indices_ptr->front() + 1)
786 my_indices_ptr(std::move(indices_ptr))
789 const Value_* fetch(Index_ i, Value_* buffer) {
790 return my_core.fetch_indices(i, *my_indices_ptr, buffer);
794 DenseCore<solo_, oracle_, omode_, Index_, CachedValue_> my_core;
827template<
typename Value_,
typename Index_,
typename CachedValue_ = Value_>
829 std::string my_file_name, my_dataset_name;
832 std::size_t my_cache_size_in_elements;
833 bool my_require_minimum_cache;
835 tatami_chunked::ChunkDimensionStats<Index_> my_firstdim_stats, my_seconddim_stats;
836 bool my_prefer_firstdim;
848 my_file_name(std::move(file)),
849 my_dataset_name(std::move(name)),
850 my_transpose(transpose),
851 my_cache_size_in_elements(options.maximum_cache_size / sizeof(CachedValue_)),
852 my_require_minimum_cache(options.require_minimum_cache)
855 H5::H5File fhandle(my_file_name, H5F_ACC_RDONLY);
856 auto dhandle = open_and_check_dataset<false>(fhandle, my_dataset_name);
857 auto dims = get_array_dimensions<2>(dhandle, my_dataset_name);
859 hsize_t chunk_dims[2];
860 auto dparms = dhandle.getCreatePlist();
861 if (dparms.getLayout() != H5D_CHUNKED) {
864 chunk_dims[1] = dims[1];
866 dparms.getChunk(2, chunk_dims);
869 my_firstdim_stats = tatami_chunked::ChunkDimensionStats<Index_>(
870 sanisizer::cast<Index_>(dims[0]),
871 sanisizer::cast<Index_>(chunk_dims[0])
873 my_seconddim_stats = tatami_chunked::ChunkDimensionStats<Index_>(
874 sanisizer::cast<Index_>(dims[1]),
875 sanisizer::cast<Index_>(chunk_dims[1])
884 my_prefer_firstdim = (my_firstdim_stats.num_chunks > my_seconddim_stats.num_chunks);
897 bool prefer_rows_internal()
const {
899 return !my_prefer_firstdim;
901 return my_prefer_firstdim;
905 Index_ nrow_internal()
const {
907 return my_seconddim_stats.dimension_extent;
909 return my_firstdim_stats.dimension_extent;
913 Index_ ncol_internal()
const {
915 return my_firstdim_stats.dimension_extent;
917 return my_seconddim_stats.dimension_extent;
922 Index_ nrow()
const {
923 return nrow_internal();
926 Index_ ncol()
const {
927 return ncol_internal();
938 return prefer_rows_internal();
941 double prefer_rows_proportion()
const {
942 return static_cast<double>(prefer_rows_internal());
945 bool uses_oracle(
bool)
const {
949 bool is_sparse()
const {
953 double is_sparse_proportion()
const {
962 template<
bool oracle_,
template<
bool,
bool, DenseMatrix_
internal::OracularMode,
typename,
typename,
typename>
class Extractor_,
typename ... Args_>
963 std::unique_ptr<tatami::DenseExtractor<oracle_, Value_, Index_> > populate(
965 Index_ non_target_length,
969 bool by_h5_row = (row != my_transpose);
970 const auto& dim_stats = (by_h5_row ? my_firstdim_stats : my_seconddim_stats);
972 tatami_chunked::SlabCacheStats<Index_> slab_stats(
973 dim_stats.chunk_length,
975 dim_stats.num_chunks,
976 my_cache_size_in_elements,
977 my_require_minimum_cache
980 if (slab_stats.max_slabs_in_cache == 0) {
981 return std::make_unique<Extractor_<true, oracle_, DenseMatrix_internal::OracularMode::NOT_APPLICABLE, Value_, Index_, CachedValue_> >(
982 my_file_name, my_dataset_name, dim_stats, std::move(oracle), std::forward<Args_>(args)..., slab_stats, by_h5_row
986 if constexpr(oracle_) {
987 typedef DenseMatrix_internal::Index<false, oracle_, DenseMatrix_internal::OracularMode::INDEXED, Value_, Index_, CachedValue_> CurIndex;
988 if constexpr(std::is_same<CurIndex, Extractor_<false, oracle_, DenseMatrix_internal::OracularMode::INDEXED, Value_, Index_, CachedValue_> >::value) {
989 return std::make_unique<CurIndex>(
990 my_file_name, my_dataset_name, dim_stats, std::move(oracle), std::forward<Args_>(args)..., slab_stats, by_h5_row
994 return std::make_unique<Extractor_<false, oracle_, DenseMatrix_internal::OracularMode::BLOCK_NORMAL, Value_, Index_, CachedValue_> >(
995 my_file_name, my_dataset_name, dim_stats, std::move(oracle), std::forward<Args_>(args)..., slab_stats, by_h5_row
998 return std::make_unique<Extractor_<false, oracle_, DenseMatrix_internal::OracularMode::BLOCK_TRANSPOSED, Value_, Index_, CachedValue_> >(
999 my_file_name, my_dataset_name, dim_stats, std::move(oracle), std::forward<Args_>(args)..., slab_stats, by_h5_row
1004 return std::make_unique<Extractor_<false, oracle_, DenseMatrix_internal::OracularMode::NOT_APPLICABLE, Value_, Index_, CachedValue_> >(
1005 my_file_name, my_dataset_name, dim_stats, std::move(oracle), std::forward<Args_>(args)..., slab_stats, by_h5_row
1014 std::unique_ptr<tatami::MyopicDenseExtractor<Value_, Index_> > dense(
bool row,
const tatami::Options&)
const {
1015 Index_ full_non_target = (row ? ncol_internal() : nrow_internal());
1016 return populate<false, DenseMatrix_internal::Full>(row, full_non_target,
false, full_non_target);
1019 std::unique_ptr<tatami::MyopicDenseExtractor<Value_, Index_> > dense(
bool row, Index_ block_start, Index_ block_length,
const tatami::Options&)
const {
1020 return populate<false, DenseMatrix_internal::Block>(row, block_length,
false, block_start, block_length);
1024 auto nidx = indices_ptr->size();
1025 return populate<false, DenseMatrix_internal::Index>(row, nidx,
false, std::move(indices_ptr));
1032 std::unique_ptr<tatami::MyopicSparseExtractor<Value_, Index_> > sparse(
bool row,
const tatami::Options& opt)
const {
1033 Index_ full_non_target = (row ? ncol_internal() : nrow_internal());
1034 return std::make_unique<tatami::FullSparsifiedWrapper<false, Value_, Index_> >(dense(row, opt), full_non_target, opt);
1037 std::unique_ptr<tatami::MyopicSparseExtractor<Value_, Index_> > sparse(
bool row, Index_ block_start, Index_ block_length,
const tatami::Options& opt)
const {
1038 return std::make_unique<tatami::BlockSparsifiedWrapper<false, Value_, Index_> >(dense(row, block_start, block_length, opt), block_start, block_length, opt);
1042 auto ptr = dense(row, indices_ptr, opt);
1043 return std::make_unique<tatami::IndexSparsifiedWrapper<false, Value_, Index_> >(std::move(ptr), std::move(indices_ptr), opt);
1050 std::unique_ptr<tatami::OracularDenseExtractor<Value_, Index_> > dense(
1055 Index_ full_non_target = (row ? ncol_internal() : nrow_internal());
1056 return populate<true, DenseMatrix_internal::Full>(row, full_non_target, std::move(oracle), full_non_target);
1059 std::unique_ptr<tatami::OracularDenseExtractor<Value_, Index_> > dense(
1063 Index_ block_length,
1066 return populate<true, DenseMatrix_internal::Block>(row, block_length, std::move(oracle), block_start, block_length);
1069 std::unique_ptr<tatami::OracularDenseExtractor<Value_, Index_> > dense(
1075 auto nidx = indices_ptr->size();
1076 return populate<true, DenseMatrix_internal::Index>(row, nidx, std::move(oracle), std::move(indices_ptr));
1083 std::unique_ptr<tatami::OracularSparseExtractor<Value_, Index_> > sparse(
1088 Index_ full_non_target = (row ? ncol_internal() : nrow_internal());
1089 return std::make_unique<tatami::FullSparsifiedWrapper<true, Value_, Index_> >(dense(row, std::move(oracle), opt), full_non_target, opt);
1092 std::unique_ptr<tatami::OracularSparseExtractor<Value_, Index_> > sparse(
1096 Index_ block_length,
1099 return std::make_unique<tatami::BlockSparsifiedWrapper<true, Value_, Index_> >(dense(row, std::move(oracle), block_start, block_length, opt), block_start, block_length, opt);
1102 std::unique_ptr<tatami::OracularSparseExtractor<Value_, Index_> > sparse(
1108 auto ptr = dense(row, std::move(oracle), indices_ptr, opt);
1109 return std::make_unique<tatami::IndexSparsifiedWrapper<true, Value_, Index_> >(std::move(ptr), std::move(indices_ptr), opt);
Dense matrix backed by a DataSet in a HDF5 file.
Definition DenseMatrix.hpp:828
DenseMatrix(std::string file, std::string name, bool transpose)
Definition DenseMatrix.hpp:893
bool prefer_rows() const
Definition DenseMatrix.hpp:937
DenseMatrix(std::string file, std::string name, bool transpose, const DenseMatrixOptions &options)
Definition DenseMatrix.hpp:847
Representations for matrix data in HDF5 files.
Definition CompressedSparseMatrix.hpp:24
void serialize(Function_ f)
Definition serialize.hpp:53
std::shared_ptr< const std::vector< Index_ > > VectorPtr
void transpose(const Input_ *const input, const std::size_t nrow, const std::size_t ncol, const std::size_t input_stride, Output_ *const output, const std::size_t output_stride)
typename std::conditional< oracle_, std::shared_ptr< const Oracle< Index_ > >, bool >::type MaybeOracle
typename std::conditional< oracle_, OracularDenseExtractor< Value_, Index_ >, MyopicDenseExtractor< Value_, Index_ > >::type DenseExtractor
Default locking for serial access.
Options for DenseMatrix extraction.
Definition DenseMatrix.hpp:27
std::size_t maximum_cache_size
Definition DenseMatrix.hpp:37
bool require_minimum_cache
Definition DenseMatrix.hpp:44
Utilities for HDF5 extraction.