1#ifndef TATAMI_CHUNKED_CUSTOM_SPARSE_CHUNKED_MATRIX_HPP
2#define TATAMI_CHUNKED_CUSTOM_SPARSE_CHUNKED_MATRIX_HPP
4#include "custom_internals.hpp"
16#include "sanisizer/sanisizer.hpp"
55template<
typename ChunkValue_,
typename Index_>
113 Index_ chunk_column_id,
116 Index_ target_length,
117 Index_ non_target_start,
118 Index_ non_target_length,
119 const std::vector<ChunkValue_*>& output_values,
120 const std::vector<Index_*>& output_indices,
121 Index_* output_number,
165 Index_ chunk_column_id,
168 Index_ target_length,
169 const std::vector<Index_>& non_target_indices,
170 const std::vector<ChunkValue_*>& output_values,
171 const std::vector<Index_*>& output_indices,
172 Index_* output_number,
215 Index_ chunk_column_id,
217 const std::vector<Index_>& target_indices,
218 Index_ non_target_start,
219 Index_ non_target_length,
220 const std::vector<ChunkValue_*>& output_values,
221 const std::vector<Index_*>& output_indices,
222 Index_* output_number,
264 Index_ chunk_column_id,
266 const std::vector<Index_>& target_indices,
267 const std::vector<Index_>& non_target_indices,
268 const std::vector<ChunkValue_*>& output_values,
269 const std::vector<Index_*>& output_indices,
270 Index_* output_number,
280template<
typename ChunkValue_,
typename Index_>
299 virtual std::unique_ptr<CustomSparseChunkedMatrixWorkspace<ChunkValue_, Index_> >
new_workspace()
const = 0;
335namespace CustomChunkedMatrix_internal {
341template<
bool oracle_,
typename Value_,
typename Index_,
typename ChunkValue_,
class WorkspacePtr_>
342class SoloSparseCore {
343 WorkspacePtr_ my_chunk_workspace;
344 const ChunkCoordinator<true, ChunkValue_, Index_>& my_coordinator;
347 typename std::conditional<oracle_, tatami::PredictionIndex, bool>::type my_counter = 0;
350 typedef typename I<
decltype(my_factory)>::Slab Slab;
357 SparseSingleWorkspace<ChunkValue_, Index_> my_tmp_solo;
362 WorkspacePtr_ chunk_workspace,
363 const ChunkCoordinator<true, ChunkValue_, Index_>& coordinator,
367 Index_ non_target_length,
371 my_chunk_workspace(std::move(chunk_workspace)),
372 my_coordinator(coordinator),
373 my_oracle(std::move(oracle)),
374 my_factory(1, non_target_length, 1, needs_value, needs_index),
376 my_coordinator.get_target_chunkdim(row),
377 my_coordinator.get_non_target_chunkdim(row),
381 my_final_solo(my_factory.
create())
384 template<
typename ... Args_>
385 std::pair<const Slab*, Index_> fetch_raw(Index_ i,
bool row, Args_&& ... args) {
386 if constexpr(oracle_) {
387 i = my_oracle->get(my_counter++);
389 return my_coordinator.fetch_single(row, i, std::forward<Args_>(args)..., *my_chunk_workspace, my_tmp_solo, my_final_solo);
393template<
typename Value_,
typename Index_,
typename ChunkValue_,
class WorkspacePtr_>
394class MyopicSparseCore {
395 WorkspacePtr_ my_chunk_workspace;
396 const ChunkCoordinator<true, ChunkValue_, Index_>& my_coordinator;
398 SparseSlabFactory<ChunkValue_, Index_, Index_> my_factory;
399 typedef typename I<
decltype(my_factory)>::Slab Slab;
401 LruSlabCache<Index_, Slab> my_cache;
405 WorkspacePtr_ chunk_workspace,
406 const ChunkCoordinator<true, ChunkValue_, Index_>& coordinator,
407 const SlabCacheStats<Index_>& slab_stats,
410 Index_ non_target_length,
414 my_chunk_workspace(std::move(chunk_workspace)),
415 my_coordinator(coordinator),
416 my_factory(coordinator.get_target_chunkdim(row), non_target_length, slab_stats, needs_value, needs_index),
417 my_cache(slab_stats.max_slabs_in_cache)
420 template<
typename ... Args_>
421 std::pair<const Slab*, Index_> fetch_raw(Index_ i,
bool row, Args_&& ... args) {
422 return my_coordinator.fetch_myopic(row, i, std::forward<Args_>(args)..., *my_chunk_workspace, my_cache, my_factory);
426template<
bool use_subset_,
typename Value_,
typename Index_,
typename ChunkValue_,
class WorkspacePtr_>
427class OracularSparseCore {
429 WorkspacePtr_ my_chunk_workspace;
430 const ChunkCoordinator<true, ChunkValue_, Index_>& my_coordinator;
432 SparseSlabFactory<ChunkValue_, Index_, Index_> my_factory;
433 typedef typename I<
decltype(my_factory)>::Slab Slab;
435 typename std::conditional<use_subset_, OracularSubsettedSlabCache<Index_, Index_, Slab>, OracularSlabCache<Index_, Index_, Slab> >::type my_cache;
439 WorkspacePtr_ chunk_workspace,
440 const ChunkCoordinator<true, ChunkValue_, Index_>& coordinator,
441 const SlabCacheStats<Index_>& slab_stats,
444 Index_ non_target_length,
448 my_chunk_workspace(std::move(chunk_workspace)),
449 my_coordinator(coordinator),
450 my_factory(coordinator.get_target_chunkdim(row), non_target_length, slab_stats, needs_value, needs_index),
451 my_cache(std::move(oracle), slab_stats.max_slabs_in_cache)
454 template<
typename ... Args_>
455 std::pair<const Slab*, Index_> fetch_raw([[maybe_unused]] Index_ i,
bool row, Args_&& ... args) {
456 if constexpr(use_subset_) {
457 return my_coordinator.fetch_oracular_subsetted(row, std::forward<Args_>(args)..., *my_chunk_workspace, my_cache, my_factory);
459 return my_coordinator.fetch_oracular(row, std::forward<Args_>(args)..., *my_chunk_workspace, my_cache, my_factory);
464template<
bool solo_,
bool oracle_,
bool use_subset_,
typename Value_,
typename Index_,
typename ChunkValue_,
class WorkspacePtr_>
465using SparseCore =
typename std::conditional<solo_,
466 SoloSparseCore<oracle_, Value_, Index_, ChunkValue_, WorkspacePtr_>,
467 typename std::conditional<oracle_,
468 OracularSparseCore<use_subset_, Value_, Index_, ChunkValue_, WorkspacePtr_>,
469 MyopicSparseCore<Value_, Index_, ChunkValue_, WorkspacePtr_>
477template<
class Slab_,
typename Index_,
typename Value_>
478tatami::SparseRange<Value_, Index_> process_sparse_slab(
const std::pair<const Slab_*, Index_>& fetched, Value_* value_buffer, Index_* index_buffer,
bool needs_value,
bool needs_index) {
479 auto num = fetched.first->
number[fetched.second];
482 auto vptr = fetched.first->values[fetched.second];
483 std::copy_n(vptr, num, value_buffer);
489 auto iptr = fetched.first->indices[fetched.second];
490 std::copy_n(iptr, num, index_buffer);
498template<
bool solo_,
bool oracle_,
bool use_subset_,
typename Value_,
typename Index_,
typename ChunkValue_,
class WorkspacePtr_>
502 WorkspacePtr_ chunk_workspace,
503 const ChunkCoordinator<true, ChunkValue_, Index_>& coordinator,
504 const SlabCacheStats<Index_>& slab_stats,
510 my_non_target_dim(coordinator.get_non_target_dim(row)),
511 my_needs_value(opt.sparse_extract_value),
512 my_needs_index(opt.sparse_extract_index),
514 std::move(chunk_workspace),
520 opt.sparse_extract_value,
521 opt.sparse_extract_index
526 auto fetched = my_core.fetch_raw(i, my_row, 0, my_non_target_dim);
527 return process_sparse_slab(fetched, value_buffer, index_buffer, my_needs_value, my_needs_index);
532 Index_ my_non_target_dim;
533 bool my_needs_value, my_needs_index;
534 SparseCore<solo_, oracle_, use_subset_, Value_, Index_, ChunkValue_, WorkspacePtr_> my_core;
537template<
bool solo_,
bool oracle_,
bool use_subset_,
typename Value_,
typename Index_,
typename ChunkValue_,
class WorkspacePtr_>
541 WorkspacePtr_ chunk_workspace,
542 const ChunkCoordinator<true, ChunkValue_, Index_>& coordinator,
543 const SlabCacheStats<Index_>& slab_stats,
551 my_block_start(block_start),
552 my_block_length(block_length),
553 my_needs_value(opt.sparse_extract_value),
554 my_needs_index(opt.sparse_extract_index),
556 std::move(chunk_workspace),
568 auto fetched = my_core.fetch_raw(i, my_row, my_block_start, my_block_length);
569 return process_sparse_slab(fetched, value_buffer, index_buffer, my_needs_value, my_needs_index);
574 Index_ my_block_start, my_block_length;
575 bool my_needs_value, my_needs_index;
576 SparseCore<solo_, oracle_, use_subset_, Value_, Index_, ChunkValue_, WorkspacePtr_> my_core;
579template<
bool solo_,
bool oracle_,
bool use_subset_,
typename Value_,
typename Index_,
typename ChunkValue_,
class WorkspacePtr_>
583 WorkspacePtr_ chunk_workspace,
584 const ChunkCoordinator<true, ChunkValue_, Index_>& coordinator,
585 const SlabCacheStats<Index_>& slab_stats,
592 my_indices_ptr(std::move(indices_ptr)),
593 my_needs_value(opt.sparse_extract_value),
594 my_needs_index(opt.sparse_extract_index),
596 std::move(chunk_workspace),
601 my_indices_ptr->size(),
608 auto fetched = my_core.fetch_raw(i, my_row, *my_indices_ptr, my_tmp_indices);
609 return process_sparse_slab(fetched, value_buffer, index_buffer, my_needs_value, my_needs_index);
615 std::vector<Index_> my_tmp_indices;
616 bool my_needs_value, my_needs_index;
617 SparseCore<solo_, oracle_, use_subset_, Value_, Index_, ChunkValue_, WorkspacePtr_> my_core;
624template<
bool solo_,
bool oracle_,
bool use_subset_,
typename Value_,
typename Index_,
typename ChunkValue_,
class WorkspacePtr_>
628 WorkspacePtr_ chunk_workspace,
629 const ChunkCoordinator<true, ChunkValue_, Index_>& coordinator,
630 const SlabCacheStats<Index_>& slab_stats,
636 my_non_target_dim(coordinator.get_non_target_dim(row)),
638 std::move(chunk_workspace),
649 const Value_* fetch(Index_ i, Value_* buffer) {
650 auto contents = my_core.fetch_raw(i, my_row, 0, my_non_target_dim);
652 Index_ num = contents.first->number[contents.second];
653 auto vptr = contents.first->values[contents.second];
654 auto iptr = contents.first->indices[contents.second];
656 std::fill_n(buffer, my_non_target_dim, 0);
657 for (Index_ x = 0; x < num; ++x, ++iptr, ++vptr) {
658 buffer[*iptr] = *vptr;
665 Index_ my_non_target_dim;
666 SparseCore<solo_, oracle_, use_subset_, Value_, Index_, ChunkValue_, WorkspacePtr_> my_core;
669template<
bool solo_,
bool oracle_,
bool use_subset_,
typename Value_,
typename Index_,
typename ChunkValue_,
class WorkspacePtr_>
673 WorkspacePtr_ chunk_workspace,
674 const ChunkCoordinator<true, ChunkValue_, Index_>& coordinator,
675 const SlabCacheStats<Index_>& slab_stats,
683 my_block_start(block_start),
684 my_block_length(block_length),
686 std::move(chunk_workspace),
697 const Value_* fetch(Index_ i, Value_* buffer) {
698 auto contents = my_core.fetch_raw(i, my_row, my_block_start, my_block_length);
700 auto vptr = contents.first->values[contents.second];
701 auto iptr = contents.first->indices[contents.second];
702 auto num = contents.first->number[contents.second];
704 std::fill_n(buffer, my_block_length, 0);
705 for (Index_ x = 0; x < num; ++x, ++iptr, ++vptr) {
706 buffer[*iptr - my_block_start] = *vptr;
713 Index_ my_block_start, my_block_length;
714 SparseCore<solo_, oracle_, use_subset_, Value_, Index_, ChunkValue_, WorkspacePtr_> my_core;
717template<
bool solo_,
bool oracle_,
bool use_subset_,
typename Value_,
typename Index_,
typename ChunkValue_,
class WorkspacePtr_>
721 WorkspacePtr_ chunk_workspace,
722 const ChunkCoordinator<true, ChunkValue_, Index_>& coordinator,
723 const SlabCacheStats<Index_>& slab_stats,
730 my_indices_ptr(std::move(indices_ptr)),
732 std::move(chunk_workspace),
737 my_indices_ptr->size(),
742 const auto& indices = *my_indices_ptr;
743 if (!indices.empty()) {
744 my_remap_offset = indices.front();
745 Index_ alloc = indices.back() - my_remap_offset + 1;
746 tatami::resize_container_to_Index_size(my_remap, alloc);
748 for (auto i : indices) {
749 my_remap[i - my_remap_offset] = counter;
755 const Value_* fetch(Index_ i, Value_* buffer) {
756 auto contents = my_core.fetch_raw(i, my_row, *my_indices_ptr, my_tmp_indices);
758 auto vptr = contents.first->values[contents.second];
759 auto iptr = contents.first->indices[contents.second];
760 auto num = contents.first->number[contents.second];
762 auto nidx = my_indices_ptr->size();
763 std::fill_n(buffer, nidx, 0);
764 for (Index_ x = 0; x <num; ++x, ++iptr, ++vptr) {
765 buffer[my_remap[*iptr - my_remap_offset]] = *vptr;
773 Index_ my_remap_offset = 0;
774 std::vector<Index_> my_remap;
775 std::vector<Index_> my_tmp_indices;
776 SparseCore<solo_, oracle_, use_subset_, Value_, Index_, ChunkValue_, WorkspacePtr_> my_core;
799template<
typename Value_,
typename Index_,
typename ChunkValue_,
class Manager_ = CustomSparseChunkedMatrixManager<ChunkValue_, Index_> >
807 my_manager(std::move(manager)),
808 my_coordinator(my_manager->row_stats(), my_manager->column_stats()),
809 my_cache_size_in_bytes(opt.maximum_cache_size),
810 my_require_minimum_cache(opt.require_minimum_cache),
811 my_cache_subset(opt.cache_subset)
815 std::shared_ptr<Manager_> my_manager;
816 CustomChunkedMatrix_internal::ChunkCoordinator<true, ChunkValue_, Index_> my_coordinator;
817 std::size_t my_cache_size_in_bytes;
818 bool my_require_minimum_cache;
819 bool my_cache_subset;
822 Index_ nrow()
const {
823 return my_coordinator.get_nrow();
826 Index_ ncol()
const {
827 return my_coordinator.get_ncol();
830 bool prefer_rows()
const {
831 return my_manager->prefer_rows();
834 bool uses_oracle(
bool)
const {
838 double prefer_rows_proportion()
const {
839 return static_cast<double>(my_manager->prefer_rows());
842 bool is_sparse()
const {
846 double is_sparse_proportion()
const {
859 template<
bool,
typename,
typename>
class Interface_,
861 template<
bool,
bool,
bool,
typename,
typename,
typename,
class>
class Extractor_,
864 std::unique_ptr<Interface_<oracle_, Value_, Index_> > raw_internal(
bool row, Index_ non_target_length,
const tatami::Options& opt, Args_&& ... args)
const {
869 return SlabCacheStats<Index_>(
870 my_coordinator.get_chunk_nrow(),
872 my_coordinator.get_num_chunks_per_column(),
873 my_cache_size_in_bytes,
875 my_require_minimum_cache
879 return SlabCacheStats<Index_>(
880 my_coordinator.get_chunk_ncol(),
882 my_coordinator.get_num_chunks_per_row(),
883 my_cache_size_in_bytes,
885 my_require_minimum_cache
890 auto wrk = my_manager->new_workspace_exact();
891 if (stats.max_slabs_in_cache == 0) {
892 return std::make_unique<Extractor_<
true, oracle_,
false, Value_, Index_, ChunkValue_, I<
decltype(wrk)> > >(std::move(wrk), my_coordinator, stats, row, std::forward<Args_>(args)...);
893 }
else if constexpr(oracle_) {
894 if (my_cache_subset) {
895 return std::make_unique<Extractor_<
false,
true,
true, Value_, Index_, ChunkValue_, I<
decltype(wrk)> > >(std::move(wrk), my_coordinator, stats, row, std::forward<Args_>(args)...);
897 return std::make_unique<Extractor_<
false,
true,
false, Value_, Index_, ChunkValue_, I<
decltype(wrk)> > >(std::move(wrk), my_coordinator, stats, row, std::forward<Args_>(args)...);
900 return std::make_unique<Extractor_<
false,
false,
false, Value_, Index_, ChunkValue_, I<
decltype(wrk)> > >(std::move(wrk), my_coordinator, stats, row, std::forward<Args_>(args)...);
905 std::unique_ptr<tatami::MyopicDenseExtractor<Value_, Index_> > dense(
bool row,
const tatami::Options& opt)
const {
906 return raw_internal<tatami::DenseExtractor, false, CustomChunkedMatrix_internal::DensifiedFull>(row, my_coordinator.get_non_target_dim(row), opt,
false, opt);
909 std::unique_ptr<tatami::MyopicDenseExtractor<Value_, Index_> > dense(
bool row, Index_ block_start, Index_ block_length,
const tatami::Options& opt)
const {
910 return raw_internal<tatami::DenseExtractor, false, CustomChunkedMatrix_internal::DensifiedBlock>(row, block_length, opt,
false, block_start, block_length, opt);
914 auto num_indices = indices_ptr->size();
915 return raw_internal<tatami::DenseExtractor, false, CustomChunkedMatrix_internal::DensifiedIndex>(row, num_indices, opt,
false, std::move(indices_ptr), opt);
922 std::unique_ptr<tatami::OracularDenseExtractor<Value_, Index_> > dense(
927 return raw_internal<tatami::DenseExtractor, true, CustomChunkedMatrix_internal::DensifiedFull>(row, my_coordinator.get_non_target_dim(row), opt, std::move(oracle), opt);
930 std::unique_ptr<tatami::OracularDenseExtractor<Value_, Index_> > dense(
937 return raw_internal<tatami::DenseExtractor, true, CustomChunkedMatrix_internal::DensifiedBlock>(row, block_length, opt, std::move(oracle), block_start, block_length, opt);
940 std::unique_ptr<tatami::OracularDenseExtractor<Value_, Index_> > dense(
946 auto num_indices = indices_ptr->size();
947 return raw_internal<tatami::DenseExtractor, true, CustomChunkedMatrix_internal::DensifiedIndex>(row, num_indices, opt, std::move(oracle), std::move(indices_ptr), opt);
954 std::unique_ptr<tatami::MyopicSparseExtractor<Value_, Index_> > sparse(
bool row,
const tatami::Options& opt)
const {
955 return raw_internal<tatami::SparseExtractor, false, CustomChunkedMatrix_internal::SparseFull>(row, my_coordinator.get_non_target_dim(row), opt,
false, opt);
958 std::unique_ptr<tatami::MyopicSparseExtractor<Value_, Index_> > sparse(
bool row, Index_ block_start, Index_ block_length,
const tatami::Options& opt)
const {
959 return raw_internal<tatami::SparseExtractor, false, CustomChunkedMatrix_internal::SparseBlock>(row, block_length, opt,
false, block_start, block_length, opt);
963 auto num_indices = indices_ptr->size();
964 return raw_internal<tatami::SparseExtractor, false, CustomChunkedMatrix_internal::SparseIndex>(row, num_indices, opt,
false, std::move(indices_ptr), opt);
971 std::unique_ptr<tatami::OracularSparseExtractor<Value_, Index_> > sparse(
976 return raw_internal<tatami::SparseExtractor, true, CustomChunkedMatrix_internal::SparseFull>(row, my_coordinator.get_non_target_dim(row), opt, std::move(oracle), opt);
979 std::unique_ptr<tatami::OracularSparseExtractor<Value_, Index_> > sparse(
986 return raw_internal<tatami::SparseExtractor, true, CustomChunkedMatrix_internal::SparseBlock>(row, block_length, opt, std::move(oracle), block_start, block_length, opt);
989 std::unique_ptr<tatami::OracularSparseExtractor<Value_, Index_> > sparse(
995 auto num_indices = indices_ptr->size();
996 return raw_internal<tatami::SparseExtractor, true, CustomChunkedMatrix_internal::SparseIndex>(row, num_indices, opt, std::move(oracle), std::move(indices_ptr), opt);
Create a LRU cache of slabs.
Create a oracle-aware cache for slabs.
Create a oracle-aware cache with subsets.
Factory for sparse slabs.
Manager of chunks for a CustomSparseChunkedMatrix.
Definition CustomSparseChunkedMatrix.hpp:281
std::unique_ptr< CustomSparseChunkedMatrixWorkspace< ChunkValue_, Index_ > > new_workspace_exact() const
Definition CustomSparseChunkedMatrix.hpp:308
virtual const ChunkDimensionStats< Index_ > & column_stats() const =0
virtual const ChunkDimensionStats< Index_ > & row_stats() const =0
virtual std::unique_ptr< CustomSparseChunkedMatrixWorkspace< ChunkValue_, Index_ > > new_workspace() const =0
virtual bool prefer_rows() const =0
Workspace for extracting data from a CustomSparseChunkedMatrixManager.
Definition CustomSparseChunkedMatrix.hpp:56
virtual void extract(Index_ chunk_row_id, Index_ chunk_column_id, bool row, Index_ target_start, Index_ target_length, const std::vector< Index_ > &non_target_indices, const std::vector< ChunkValue_ * > &output_values, const std::vector< Index_ * > &output_indices, Index_ *output_number, Index_ shift)=0
virtual void extract(Index_ chunk_row_id, Index_ chunk_column_id, bool row, const std::vector< Index_ > &target_indices, Index_ non_target_start, Index_ non_target_length, const std::vector< ChunkValue_ * > &output_values, const std::vector< Index_ * > &output_indices, Index_ *output_number, Index_ shift)=0
virtual void extract(Index_ chunk_row_id, Index_ chunk_column_id, bool row, const std::vector< Index_ > &target_indices, const std::vector< Index_ > &non_target_indices, const std::vector< ChunkValue_ * > &output_values, const std::vector< Index_ * > &output_indices, Index_ *output_number, Index_ shift)=0
virtual void extract(Index_ chunk_row_id, Index_ chunk_column_id, bool row, Index_ target_start, Index_ target_length, Index_ non_target_start, Index_ non_target_length, const std::vector< ChunkValue_ * > &output_values, const std::vector< Index_ * > &output_indices, Index_ *output_number, Index_ shift)=0
Matrix of custom sparse chunks.
Definition CustomSparseChunkedMatrix.hpp:800
CustomSparseChunkedMatrix(std::shared_ptr< Manager_ > manager, const CustomSparseChunkedMatrixOptions &opt)
Definition CustomSparseChunkedMatrix.hpp:806
Factory for sparse slabs.
Definition SparseSlabFactory.hpp:34
Slab create()
Definition SparseSlabFactory.hpp:162
Methods to handle chunked tatami matrices.
Definition ChunkDimensionStats.hpp:11
std::shared_ptr< const std::vector< Index_ > > VectorPtr
typename std::conditional< oracle_, OracularSparseExtractor< Value_, Index_ >, MyopicSparseExtractor< Value_, Index_ > >::type SparseExtractor
typename std::conditional< oracle_, std::shared_ptr< const Oracle< Index_ > >, bool >::type MaybeOracle
typename std::conditional< oracle_, OracularDenseExtractor< Value_, Index_ >, MyopicDenseExtractor< Value_, Index_ > >::type DenseExtractor
bool sparse_extract_index
bool sparse_extract_value
Statistics for regular chunks along a dimension.
Definition ChunkDimensionStats.hpp:42
Options for data extraction from a CustomSparseChunkedMatrix.
Definition CustomSparseChunkedMatrix.hpp:28
std::size_t maximum_cache_size
Definition CustomSparseChunkedMatrix.hpp:34
bool cache_subset
Definition CustomSparseChunkedMatrix.hpp:47
bool require_minimum_cache
Definition CustomSparseChunkedMatrix.hpp:41
Statistics for slab caching.
Definition SlabCacheStats.hpp:26