1#ifndef TATAMI_CHUNKED_CUSTOM_SPARSE_CHUNKED_MATRIX_HPP
2#define TATAMI_CHUNKED_CUSTOM_SPARSE_CHUNKED_MATRIX_HPP
5#include "custom_internals.hpp"
15#include "sanisizer/sanisizer.hpp"
54template<
typename ChunkValue_,
typename Index_>
112 Index_ chunk_column_id,
115 Index_ target_length,
116 Index_ non_target_start,
117 Index_ non_target_length,
118 const std::vector<ChunkValue_*>& output_values,
119 const std::vector<Index_*>& output_indices,
120 Index_* output_number,
164 Index_ chunk_column_id,
167 Index_ target_length,
168 const std::vector<Index_>& non_target_indices,
169 const std::vector<ChunkValue_*>& output_values,
170 const std::vector<Index_*>& output_indices,
171 Index_* output_number,
214 Index_ chunk_column_id,
216 const std::vector<Index_>& target_indices,
217 Index_ non_target_start,
218 Index_ non_target_length,
219 const std::vector<ChunkValue_*>& output_values,
220 const std::vector<Index_*>& output_indices,
221 Index_* output_number,
263 Index_ chunk_column_id,
265 const std::vector<Index_>& target_indices,
266 const std::vector<Index_>& non_target_indices,
267 const std::vector<ChunkValue_*>& output_values,
268 const std::vector<Index_*>& output_indices,
269 Index_* output_number,
279template<
typename ChunkValue_,
typename Index_>
298 virtual std::unique_ptr<CustomSparseChunkedMatrixWorkspace<ChunkValue_, Index_> >
new_workspace()
const = 0;
334namespace CustomChunkedMatrix_internal {
340template<
bool oracle_,
typename Value_,
typename Index_,
typename ChunkValue_,
class WorkspacePtr_>
341class SoloSparseCore {
342 WorkspacePtr_ my_chunk_workspace;
343 const ChunkCoordinator<true, ChunkValue_, Index_>& my_coordinator;
346 typename std::conditional<oracle_, tatami::PredictionIndex, bool>::type my_counter = 0;
349 typedef typename decltype(my_factory)::Slab Slab;
356 SparseSingleWorkspace<ChunkValue_, Index_> my_tmp_solo;
361 WorkspacePtr_ chunk_workspace,
362 const ChunkCoordinator<true, ChunkValue_, Index_>& coordinator,
366 Index_ non_target_length,
370 my_chunk_workspace(std::move(chunk_workspace)),
371 my_coordinator(coordinator),
372 my_oracle(std::move(oracle)),
373 my_factory(1, non_target_length, 1, needs_value, needs_index),
375 my_coordinator.get_target_chunkdim(row),
376 my_coordinator.get_non_target_chunkdim(row),
380 my_final_solo(my_factory.
create())
383 template<
typename ... Args_>
384 std::pair<const Slab*, Index_> fetch_raw(Index_ i,
bool row, Args_&& ... args) {
385 if constexpr(oracle_) {
386 i = my_oracle->get(my_counter++);
388 return my_coordinator.fetch_single(row, i, std::forward<Args_>(args)..., *my_chunk_workspace, my_tmp_solo, my_final_solo);
392template<
typename Value_,
typename Index_,
typename ChunkValue_,
class WorkspacePtr_>
393class MyopicSparseCore {
394 WorkspacePtr_ my_chunk_workspace;
395 const ChunkCoordinator<true, ChunkValue_, Index_>& my_coordinator;
397 SparseSlabFactory<ChunkValue_, Index_, Index_> my_factory;
398 typedef typename decltype(my_factory)::Slab Slab;
400 LruSlabCache<Index_, Slab> my_cache;
404 WorkspacePtr_ chunk_workspace,
405 const ChunkCoordinator<true, ChunkValue_, Index_>& coordinator,
406 const SlabCacheStats<Index_>& slab_stats,
409 Index_ non_target_length,
413 my_chunk_workspace(std::move(chunk_workspace)),
414 my_coordinator(coordinator),
415 my_factory(coordinator.get_target_chunkdim(row), non_target_length, slab_stats, needs_value, needs_index),
416 my_cache(slab_stats.max_slabs_in_cache)
419 template<
typename ... Args_>
420 std::pair<const Slab*, Index_> fetch_raw(Index_ i,
bool row, Args_&& ... args) {
421 return my_coordinator.fetch_myopic(row, i, std::forward<Args_>(args)..., *my_chunk_workspace, my_cache, my_factory);
425template<
bool use_subset_,
typename Value_,
typename Index_,
typename ChunkValue_,
class WorkspacePtr_>
426class OracularSparseCore {
428 WorkspacePtr_ my_chunk_workspace;
429 const ChunkCoordinator<true, ChunkValue_, Index_>& my_coordinator;
431 SparseSlabFactory<ChunkValue_, Index_, Index_> my_factory;
432 typedef typename decltype(my_factory)::Slab Slab;
434 typename std::conditional<use_subset_, OracularSubsettedSlabCache<Index_, Index_, Slab>, OracularSlabCache<Index_, Index_, Slab> >::type my_cache;
438 WorkspacePtr_ chunk_workspace,
439 const ChunkCoordinator<true, ChunkValue_, Index_>& coordinator,
440 const SlabCacheStats<Index_>& slab_stats,
443 Index_ non_target_length,
447 my_chunk_workspace(std::move(chunk_workspace)),
448 my_coordinator(coordinator),
449 my_factory(coordinator.get_target_chunkdim(row), non_target_length, slab_stats, needs_value, needs_index),
450 my_cache(std::move(oracle), slab_stats.max_slabs_in_cache)
453 template<
typename ... Args_>
454 std::pair<const Slab*, Index_> fetch_raw([[maybe_unused]] Index_ i,
bool row, Args_&& ... args) {
455 if constexpr(use_subset_) {
456 return my_coordinator.fetch_oracular_subsetted(row, std::forward<Args_>(args)..., *my_chunk_workspace, my_cache, my_factory);
458 return my_coordinator.fetch_oracular(row, std::forward<Args_>(args)..., *my_chunk_workspace, my_cache, my_factory);
463template<
bool solo_,
bool oracle_,
bool use_subset_,
typename Value_,
typename Index_,
typename ChunkValue_,
class WorkspacePtr_>
464using SparseCore =
typename std::conditional<solo_,
465 SoloSparseCore<oracle_, Value_, Index_, ChunkValue_, WorkspacePtr_>,
466 typename std::conditional<oracle_,
467 OracularSparseCore<use_subset_, Value_, Index_, ChunkValue_, WorkspacePtr_>,
468 MyopicSparseCore<Value_, Index_, ChunkValue_, WorkspacePtr_>
476template<
class Slab_,
typename Index_,
typename Value_>
477tatami::SparseRange<Value_, Index_> process_sparse_slab(
const std::pair<const Slab_*, Index_>& fetched, Value_* value_buffer, Index_* index_buffer,
bool needs_value,
bool needs_index) {
478 auto num = fetched.first->
number[fetched.second];
481 auto vptr = fetched.first->values[fetched.second];
482 std::copy_n(vptr, num, value_buffer);
488 auto iptr = fetched.first->indices[fetched.second];
489 std::copy_n(iptr, num, index_buffer);
497template<
bool solo_,
bool oracle_,
bool use_subset_,
typename Value_,
typename Index_,
typename ChunkValue_,
class WorkspacePtr_>
501 WorkspacePtr_ chunk_workspace,
502 const ChunkCoordinator<true, ChunkValue_, Index_>& coordinator,
503 const SlabCacheStats<Index_>& slab_stats,
509 my_non_target_dim(coordinator.get_non_target_dim(row)),
510 my_needs_value(opt.sparse_extract_value),
511 my_needs_index(opt.sparse_extract_index),
513 std::move(chunk_workspace),
519 opt.sparse_extract_value,
520 opt.sparse_extract_index
525 auto fetched = my_core.fetch_raw(i, my_row, 0, my_non_target_dim);
526 return process_sparse_slab(fetched, value_buffer, index_buffer, my_needs_value, my_needs_index);
531 Index_ my_non_target_dim;
532 bool my_needs_value, my_needs_index;
533 SparseCore<solo_, oracle_, use_subset_, Value_, Index_, ChunkValue_, WorkspacePtr_> my_core;
536template<
bool solo_,
bool oracle_,
bool use_subset_,
typename Value_,
typename Index_,
typename ChunkValue_,
class WorkspacePtr_>
540 WorkspacePtr_ chunk_workspace,
541 const ChunkCoordinator<true, ChunkValue_, Index_>& coordinator,
542 const SlabCacheStats<Index_>& slab_stats,
550 my_block_start(block_start),
551 my_block_length(block_length),
552 my_needs_value(opt.sparse_extract_value),
553 my_needs_index(opt.sparse_extract_index),
555 std::move(chunk_workspace),
567 auto fetched = my_core.fetch_raw(i, my_row, my_block_start, my_block_length);
568 return process_sparse_slab(fetched, value_buffer, index_buffer, my_needs_value, my_needs_index);
573 Index_ my_block_start, my_block_length;
574 bool my_needs_value, my_needs_index;
575 SparseCore<solo_, oracle_, use_subset_, Value_, Index_, ChunkValue_, WorkspacePtr_> my_core;
578template<
bool solo_,
bool oracle_,
bool use_subset_,
typename Value_,
typename Index_,
typename ChunkValue_,
class WorkspacePtr_>
582 WorkspacePtr_ chunk_workspace,
583 const ChunkCoordinator<true, ChunkValue_, Index_>& coordinator,
584 const SlabCacheStats<Index_>& slab_stats,
591 my_indices_ptr(std::move(indices_ptr)),
592 my_needs_value(opt.sparse_extract_value),
593 my_needs_index(opt.sparse_extract_index),
595 std::move(chunk_workspace),
600 my_indices_ptr->size(),
607 auto fetched = my_core.fetch_raw(i, my_row, *my_indices_ptr, my_tmp_indices);
608 return process_sparse_slab(fetched, value_buffer, index_buffer, my_needs_value, my_needs_index);
614 std::vector<Index_> my_tmp_indices;
615 bool my_needs_value, my_needs_index;
616 SparseCore<solo_, oracle_, use_subset_, Value_, Index_, ChunkValue_, WorkspacePtr_> my_core;
623template<
bool solo_,
bool oracle_,
bool use_subset_,
typename Value_,
typename Index_,
typename ChunkValue_,
class WorkspacePtr_>
627 WorkspacePtr_ chunk_workspace,
628 const ChunkCoordinator<true, ChunkValue_, Index_>& coordinator,
629 const SlabCacheStats<Index_>& slab_stats,
635 my_non_target_dim(coordinator.get_non_target_dim(row)),
637 std::move(chunk_workspace),
648 const Value_* fetch(Index_ i, Value_* buffer) {
649 auto contents = my_core.fetch_raw(i, my_row, 0, my_non_target_dim);
651 Index_ num = contents.first->number[contents.second];
652 auto vptr = contents.first->values[contents.second];
653 auto iptr = contents.first->indices[contents.second];
655 std::fill_n(buffer, my_non_target_dim, 0);
656 for (Index_ x = 0; x < num; ++x, ++iptr, ++vptr) {
657 buffer[*iptr] = *vptr;
664 Index_ my_non_target_dim;
665 SparseCore<solo_, oracle_, use_subset_, Value_, Index_, ChunkValue_, WorkspacePtr_> my_core;
668template<
bool solo_,
bool oracle_,
bool use_subset_,
typename Value_,
typename Index_,
typename ChunkValue_,
class WorkspacePtr_>
672 WorkspacePtr_ chunk_workspace,
673 const ChunkCoordinator<true, ChunkValue_, Index_>& coordinator,
674 const SlabCacheStats<Index_>& slab_stats,
682 my_block_start(block_start),
683 my_block_length(block_length),
685 std::move(chunk_workspace),
696 const Value_* fetch(Index_ i, Value_* buffer) {
697 auto contents = my_core.fetch_raw(i, my_row, my_block_start, my_block_length);
699 auto vptr = contents.first->values[contents.second];
700 auto iptr = contents.first->indices[contents.second];
701 auto num = contents.first->number[contents.second];
703 std::fill_n(buffer, my_block_length, 0);
704 for (Index_ x = 0; x < num; ++x, ++iptr, ++vptr) {
705 buffer[*iptr - my_block_start] = *vptr;
712 Index_ my_block_start, my_block_length;
713 SparseCore<solo_, oracle_, use_subset_, Value_, Index_, ChunkValue_, WorkspacePtr_> my_core;
716template<
bool solo_,
bool oracle_,
bool use_subset_,
typename Value_,
typename Index_,
typename ChunkValue_,
class WorkspacePtr_>
720 WorkspacePtr_ chunk_workspace,
721 const ChunkCoordinator<true, ChunkValue_, Index_>& coordinator,
722 const SlabCacheStats<Index_>& slab_stats,
729 my_indices_ptr(std::move(indices_ptr)),
731 std::move(chunk_workspace),
736 my_indices_ptr->size(),
741 const auto& indices = *my_indices_ptr;
742 if (!indices.empty()) {
743 my_remap_offset = indices.front();
744 Index_ alloc = indices.back() - my_remap_offset + 1;
745 tatami::resize_container_to_Index_size(my_remap, alloc);
747 for (auto i : indices) {
748 my_remap[i - my_remap_offset] = counter;
754 const Value_* fetch(Index_ i, Value_* buffer) {
755 auto contents = my_core.fetch_raw(i, my_row, *my_indices_ptr, my_tmp_indices);
757 auto vptr = contents.first->values[contents.second];
758 auto iptr = contents.first->indices[contents.second];
759 auto num = contents.first->number[contents.second];
761 auto nidx = my_indices_ptr->size();
762 std::fill_n(buffer, nidx, 0);
763 for (Index_ x = 0; x <num; ++x, ++iptr, ++vptr) {
764 buffer[my_remap[*iptr - my_remap_offset]] = *vptr;
772 Index_ my_remap_offset = 0;
773 std::vector<Index_> my_remap;
774 std::vector<Index_> my_tmp_indices;
775 SparseCore<solo_, oracle_, use_subset_, Value_, Index_, ChunkValue_, WorkspacePtr_> my_core;
798template<
typename Value_,
typename Index_,
typename ChunkValue_,
class Manager_ = CustomSparseChunkedMatrixManager<ChunkValue_, Index_> >
806 my_manager(std::move(manager)),
807 my_coordinator(my_manager->row_stats(), my_manager->column_stats()),
808 my_cache_size_in_bytes(opt.maximum_cache_size),
809 my_require_minimum_cache(opt.require_minimum_cache),
810 my_cache_subset(opt.cache_subset)
814 std::shared_ptr<Manager_> my_manager;
815 CustomChunkedMatrix_internal::ChunkCoordinator<true, ChunkValue_, Index_> my_coordinator;
816 std::size_t my_cache_size_in_bytes;
817 bool my_require_minimum_cache;
818 bool my_cache_subset;
821 Index_ nrow()
const {
822 return my_coordinator.get_nrow();
825 Index_ ncol()
const {
826 return my_coordinator.get_ncol();
829 bool prefer_rows()
const {
830 return my_manager->prefer_rows();
833 bool uses_oracle(
bool)
const {
837 double prefer_rows_proportion()
const {
838 return static_cast<double>(my_manager->prefer_rows());
841 bool is_sparse()
const {
845 double is_sparse_proportion()
const {
858 template<
bool,
typename,
typename>
class Interface_,
860 template<
bool,
bool,
bool,
typename,
typename,
typename,
class>
class Extractor_,
863 std::unique_ptr<Interface_<oracle_, Value_, Index_> > raw_internal(
bool row, Index_ non_target_length,
const tatami::Options& opt, Args_&& ... args)
const {
868 return SlabCacheStats<Index_>(
869 my_coordinator.get_chunk_nrow(),
871 my_coordinator.get_num_chunks_per_column(),
872 my_cache_size_in_bytes,
874 my_require_minimum_cache
878 return SlabCacheStats<Index_>(
879 my_coordinator.get_chunk_ncol(),
881 my_coordinator.get_num_chunks_per_row(),
882 my_cache_size_in_bytes,
884 my_require_minimum_cache
889 auto wrk = my_manager->new_workspace_exact();
890 if (stats.max_slabs_in_cache == 0) {
891 return std::make_unique<Extractor_<
true, oracle_,
false, Value_, Index_, ChunkValue_,
decltype(wrk)> >(std::move(wrk), my_coordinator, stats, row, std::forward<Args_>(args)...);
892 }
else if constexpr(oracle_) {
893 if (my_cache_subset) {
894 return std::make_unique<Extractor_<
false,
true,
true, Value_, Index_, ChunkValue_,
decltype(wrk)> >(std::move(wrk), my_coordinator, stats, row, std::forward<Args_>(args)...);
896 return std::make_unique<Extractor_<
false,
true,
false, Value_, Index_, ChunkValue_,
decltype(wrk)> >(std::move(wrk), my_coordinator, stats, row, std::forward<Args_>(args)...);
899 return std::make_unique<Extractor_<
false,
false,
false, Value_, Index_, ChunkValue_,
decltype(wrk)> >(std::move(wrk), my_coordinator, stats, row, std::forward<Args_>(args)...);
904 std::unique_ptr<tatami::MyopicDenseExtractor<Value_, Index_> > dense(
bool row,
const tatami::Options& opt)
const {
905 return raw_internal<tatami::DenseExtractor, false, CustomChunkedMatrix_internal::DensifiedFull>(row, my_coordinator.get_non_target_dim(row), opt,
false, opt);
908 std::unique_ptr<tatami::MyopicDenseExtractor<Value_, Index_> > dense(
bool row, Index_ block_start, Index_ block_length,
const tatami::Options& opt)
const {
909 return raw_internal<tatami::DenseExtractor, false, CustomChunkedMatrix_internal::DensifiedBlock>(row, block_length, opt,
false, block_start, block_length, opt);
913 auto num_indices = indices_ptr->size();
914 return raw_internal<tatami::DenseExtractor, false, CustomChunkedMatrix_internal::DensifiedIndex>(row, num_indices, opt,
false, std::move(indices_ptr), opt);
921 std::unique_ptr<tatami::OracularDenseExtractor<Value_, Index_> > dense(
926 return raw_internal<tatami::DenseExtractor, true, CustomChunkedMatrix_internal::DensifiedFull>(row, my_coordinator.get_non_target_dim(row), opt, std::move(oracle), opt);
929 std::unique_ptr<tatami::OracularDenseExtractor<Value_, Index_> > dense(
936 return raw_internal<tatami::DenseExtractor, true, CustomChunkedMatrix_internal::DensifiedBlock>(row, block_length, opt, std::move(oracle), block_start, block_length, opt);
939 std::unique_ptr<tatami::OracularDenseExtractor<Value_, Index_> > dense(
945 auto num_indices = indices_ptr->size();
946 return raw_internal<tatami::DenseExtractor, true, CustomChunkedMatrix_internal::DensifiedIndex>(row, num_indices, opt, std::move(oracle), std::move(indices_ptr), opt);
953 std::unique_ptr<tatami::MyopicSparseExtractor<Value_, Index_> > sparse(
bool row,
const tatami::Options& opt)
const {
954 return raw_internal<tatami::SparseExtractor, false, CustomChunkedMatrix_internal::SparseFull>(row, my_coordinator.get_non_target_dim(row), opt,
false, opt);
957 std::unique_ptr<tatami::MyopicSparseExtractor<Value_, Index_> > sparse(
bool row, Index_ block_start, Index_ block_length,
const tatami::Options& opt)
const {
958 return raw_internal<tatami::SparseExtractor, false, CustomChunkedMatrix_internal::SparseBlock>(row, block_length, opt,
false, block_start, block_length, opt);
962 auto num_indices = indices_ptr->size();
963 return raw_internal<tatami::SparseExtractor, false, CustomChunkedMatrix_internal::SparseIndex>(row, num_indices, opt,
false, std::move(indices_ptr), opt);
970 std::unique_ptr<tatami::OracularSparseExtractor<Value_, Index_> > sparse(
975 return raw_internal<tatami::SparseExtractor, true, CustomChunkedMatrix_internal::SparseFull>(row, my_coordinator.get_non_target_dim(row), opt, std::move(oracle), opt);
978 std::unique_ptr<tatami::OracularSparseExtractor<Value_, Index_> > sparse(
985 return raw_internal<tatami::SparseExtractor, true, CustomChunkedMatrix_internal::SparseBlock>(row, block_length, opt, std::move(oracle), block_start, block_length, opt);
988 std::unique_ptr<tatami::OracularSparseExtractor<Value_, Index_> > sparse(
994 auto num_indices = indices_ptr->size();
995 return raw_internal<tatami::SparseExtractor, true, CustomChunkedMatrix_internal::SparseIndex>(row, num_indices, opt, std::move(oracle), std::move(indices_ptr), opt);
Create a LRU cache of slabs.
Create a oracle-aware cache for slabs.
Create a oracle-aware cache with subsets.
Factory for sparse slabs.
Manager of chunks for a CustomSparseChunkedMatrix.
Definition CustomSparseChunkedMatrix.hpp:280
std::unique_ptr< CustomSparseChunkedMatrixWorkspace< ChunkValue_, Index_ > > new_workspace_exact() const
Definition CustomSparseChunkedMatrix.hpp:307
virtual const ChunkDimensionStats< Index_ > & column_stats() const =0
virtual const ChunkDimensionStats< Index_ > & row_stats() const =0
virtual std::unique_ptr< CustomSparseChunkedMatrixWorkspace< ChunkValue_, Index_ > > new_workspace() const =0
virtual bool prefer_rows() const =0
Workspace for extracting data from a CustomSparseChunkedMatrixManager.
Definition CustomSparseChunkedMatrix.hpp:55
virtual void extract(Index_ chunk_row_id, Index_ chunk_column_id, bool row, Index_ target_start, Index_ target_length, const std::vector< Index_ > &non_target_indices, const std::vector< ChunkValue_ * > &output_values, const std::vector< Index_ * > &output_indices, Index_ *output_number, Index_ shift)=0
virtual void extract(Index_ chunk_row_id, Index_ chunk_column_id, bool row, const std::vector< Index_ > &target_indices, Index_ non_target_start, Index_ non_target_length, const std::vector< ChunkValue_ * > &output_values, const std::vector< Index_ * > &output_indices, Index_ *output_number, Index_ shift)=0
virtual void extract(Index_ chunk_row_id, Index_ chunk_column_id, bool row, const std::vector< Index_ > &target_indices, const std::vector< Index_ > &non_target_indices, const std::vector< ChunkValue_ * > &output_values, const std::vector< Index_ * > &output_indices, Index_ *output_number, Index_ shift)=0
virtual void extract(Index_ chunk_row_id, Index_ chunk_column_id, bool row, Index_ target_start, Index_ target_length, Index_ non_target_start, Index_ non_target_length, const std::vector< ChunkValue_ * > &output_values, const std::vector< Index_ * > &output_indices, Index_ *output_number, Index_ shift)=0
Matrix of custom sparse chunks.
Definition CustomSparseChunkedMatrix.hpp:799
CustomSparseChunkedMatrix(std::shared_ptr< Manager_ > manager, const CustomSparseChunkedMatrixOptions &opt)
Definition CustomSparseChunkedMatrix.hpp:805
Factory for sparse slabs.
Definition SparseSlabFactory.hpp:31
Slab create()
Definition SparseSlabFactory.hpp:161
Methods to handle chunked tatami matrices.
Definition ChunkDimensionStats.hpp:4
std::shared_ptr< const std::vector< Index_ > > VectorPtr
typename std::conditional< oracle_, OracularSparseExtractor< Value_, Index_ >, MyopicSparseExtractor< Value_, Index_ > >::type SparseExtractor
typename std::conditional< oracle_, std::shared_ptr< const Oracle< Index_ > >, bool >::type MaybeOracle
typename std::conditional< oracle_, OracularDenseExtractor< Value_, Index_ >, MyopicDenseExtractor< Value_, Index_ > >::type DenseExtractor
bool sparse_extract_index
bool sparse_extract_value
Statistics for regular chunks along a dimension.
Definition ChunkDimensionStats.hpp:35
Options for data extraction from a CustomSparseChunkedMatrix.
Definition CustomSparseChunkedMatrix.hpp:27
std::size_t maximum_cache_size
Definition CustomSparseChunkedMatrix.hpp:33
bool cache_subset
Definition CustomSparseChunkedMatrix.hpp:46
bool require_minimum_cache
Definition CustomSparseChunkedMatrix.hpp:40
Statistics for slab caching.
Definition SlabCacheStats.hpp:26