1#ifndef TATAMI_TILEDB_SPARSE_MATRIX_HPP
2#define TATAMI_TILEDB_SPARSE_MATRIX_HPP
4#include "tatami_chunked/tatami_chunked.hpp"
5#include <tiledb/tiledb>
50namespace SparseMatrix_internal {
52typedef ::tatami_tiledb::internal::Components Components;
53typedef ::tatami_tiledb::internal::VariablyTypedDimension Dimension;
54typedef ::tatami_tiledb::internal::VariablyTypedVector CacheBuffer;
58 CacheBuffer target_indices;
59 CacheBuffer non_target_indices;
62inline size_t execute_query(
63 const Components& tdb_comp,
64 tiledb::Subarray& subarray,
65 const std::string& attribute,
67 const std::string& target_dimname,
68 const std::string& non_target_dimname,
70 size_t general_work_offset,
71 size_t target_index_work_offset,
76 tiledb::Query query(tdb_comp.ctx, tdb_comp.array);
77 query.set_subarray(subarray);
78 query.set_layout(row ? TILEDB_ROW_MAJOR : TILEDB_COL_MAJOR);
80 work.target_indices.set_data_buffer(query, target_dimname, target_index_work_offset, work_length);
82 work.values.set_data_buffer(query, attribute, general_work_offset, work_length);
85 work.non_target_indices.set_data_buffer(query, non_target_dimname, general_work_offset, work_length);
88 if (query.submit() != tiledb::Query::Status::COMPLETE) {
89 throw std::runtime_error(
"failed to read sparse data from TileDB");
92 return query.result_buffer_elements()[target_dimname].second;
99template<
typename Index_>
100struct MyopicCacheParameters {
102 size_t slab_size_in_elements;
103 size_t max_slabs_in_cache;
106template<
typename Index_>
110 const Components& tdb_comp,
111 const std::string& attribute,
113 Index_ target_dim_extent,
114 const std::string& target_dimname,
115 const Dimension& tdb_target_dim,
116 const std::string& non_target_dimname,
117 const Dimension& tdb_non_target_dim,
118 tiledb_datatype_t tdb_type,
119 [[maybe_unused]] Index_ non_target_length,
121 const MyopicCacheParameters<Index_>& cache_stats,
124 my_tdb_comp(tdb_comp),
125 my_attribute(attribute),
127 my_target_dim_extent(target_dim_extent),
128 my_tdb_target_dim(tdb_target_dim),
129 my_target_dimname(target_dimname),
130 my_tdb_non_target_dim(tdb_non_target_dim),
131 my_non_target_dimname(non_target_dimname),
132 my_target_chunk_length(cache_stats.chunk_length),
133 my_slab_size(cache_stats.slab_size_in_elements),
134 my_needs_value(needs_value),
135 my_needs_index(needs_index),
136 my_cache(cache_stats.max_slabs_in_cache)
139 my_work.target_indices.reset(my_tdb_target_dim.type(), my_slab_size);
141 size_t total_cache_size = my_slab_size * cache_stats.max_slabs_in_cache;
142 if (my_needs_value) {
143 my_work.values.reset(tdb_type, total_cache_size);
145 if (my_needs_index) {
146 my_work.non_target_indices.reset(my_tdb_non_target_dim.type(), total_cache_size);
151 const Components& my_tdb_comp;
152 const std::string& my_attribute;
155 Index_ my_target_dim_extent;
156 const Dimension& my_tdb_target_dim;
157 const std::string& my_target_dimname;
158 const Dimension& my_tdb_non_target_dim;
159 const std::string& my_non_target_dimname;
161 Index_ my_target_chunk_length;
166 std::vector<std::pair<Index_, Index_> > my_counts;
170 std::vector<size_t> indptrs;
172 size_t my_offset = 0;
173 tatami_chunked::LruSlabCache<Index_, Slab> my_cache;
176 template<
class Configure_>
177 std::pair<size_t, size_t> fetch_raw(Index_ i, Configure_ configure) {
178 Index_ chunk = i / my_target_chunk_length;
179 Index_ index = i % my_target_chunk_length;
181 const auto& info = my_cache.find(
185 output.offset = my_offset;
186 my_offset += my_slab_size;
189 [&](Index_ id, Slab& contents) ->
void {
190 Index_ chunk_start =
id * my_target_chunk_length;
191 Index_ chunk_length = std::min(my_target_dim_extent - chunk_start, my_target_chunk_length);
193 size_t num_nonzero = 0;
195 tiledb::Subarray subarray(my_tdb_comp.ctx, my_tdb_comp.array);
197 my_tdb_target_dim.add_range(subarray, 1 - rowdex, chunk_start, chunk_length);
198 configure(subarray, rowdex);
199 num_nonzero = execute_query(
205 my_non_target_dimname,
215 auto& indptrs = contents.indptrs;
217 indptrs.resize(chunk_length + 1);
220 my_work.target_indices.compact(0, num_nonzero, my_tdb_target_dim, my_counts);
221 for (
const auto& cnts : my_counts) {
222 indptrs[cnts.first - chunk_start + 1] = cnts.second;
224 for (Index_ i = 1; i <= chunk_length; ++i) {
225 indptrs[i] += indptrs[i - 1];
231 auto start = info.indptrs[index];
232 return std::make_pair(info.offset + start, info.indptrs[index + 1] - start);
236 std::pair<size_t, size_t> fetch_block(Index_ i, Index_ block_start, Index_ block_length) {
237 return fetch_raw(i, [&](tiledb::Subarray& subarray,
int rowdex) {
238 my_tdb_non_target_dim.add_range(subarray, rowdex, block_start, block_length);
242 std::pair<size_t, size_t> fetch_indices(Index_ i,
const std::vector<Index_>& indices) {
243 return fetch_raw(i, [&](tiledb::Subarray& subarray,
int rowdex) {
245 my_tdb_non_target_dim.add_range(subarray, rowdex, s, l);
251 const Workspace& get_workspace()
const {
255 bool get_needs_value()
const {
256 return my_needs_value;
259 bool get_needs_index()
const {
260 return my_needs_index;
263 const Dimension& get_tdb_non_target_dim()
const {
264 return my_tdb_non_target_dim;
276template<
typename Index_>
277struct OracularCacheParameters {
279 size_t max_cache_size_in_elements;
282template<
typename Index_>
286 const Components& tdb_comp,
287 const std::string& attribute,
289 Index_ target_dim_extent,
290 const std::string& target_dimname,
291 const Dimension& tdb_target_dim,
292 const std::string& non_target_dimname,
293 const Dimension& tdb_non_target_dim,
294 tiledb_datatype_t tdb_type,
295 Index_ non_target_length,
297 const OracularCacheParameters<Index_>& cache_stats,
300 my_tdb_comp(tdb_comp),
301 my_attribute(attribute),
303 my_target_dim_extent(target_dim_extent),
304 my_tdb_target_dim(tdb_target_dim),
305 my_target_dimname(target_dimname),
306 my_tdb_non_target_dim(tdb_non_target_dim),
307 my_non_target_dimname(non_target_dimname),
308 my_target_chunk_length(cache_stats.chunk_length),
309 my_max_slab_size(static_cast<size_t>(non_target_length) * my_target_chunk_length),
310 my_needs_value(needs_value),
311 my_needs_index(needs_index),
312 my_cache(std::move(oracle), cache_stats.max_cache_size_in_elements)
314 my_work.target_indices.reset(my_tdb_target_dim.type(), cache_stats.max_cache_size_in_elements);
315 if (my_needs_value) {
316 my_work.values.reset(tdb_type, cache_stats.max_cache_size_in_elements);
318 if (my_needs_index) {
319 my_work.non_target_indices.reset(my_tdb_non_target_dim.type(), cache_stats.max_cache_size_in_elements);
324 const Components& my_tdb_comp;
325 const std::string& my_attribute;
328 Index_ my_target_dim_extent;
329 const Dimension& my_tdb_target_dim;
330 const std::string& my_target_dimname;
331 const Dimension& my_tdb_non_target_dim;
332 const std::string& my_non_target_dimname;
334 Index_ my_target_chunk_length;
335 size_t my_max_slab_size;
339 std::vector<std::pair<Index_, Index_> > my_counts;
343 std::vector<size_t> indptrs;
345 tatami_chunked::OracularVariableSlabCache<Index_, Index_, Slab, size_t> my_cache;
348 template<
class Function_>
349 static void sort_by_field(std::vector<std::pair<Index_, size_t> >& indices, Function_ field) {
350 auto comp = [&field](
const std::pair<Index_, size_t>& l,
const std::pair<Index_, size_t>& r) ->
bool {
351 return field(l) < field(r);
353 if (!std::is_sorted(indices.begin(), indices.end(), comp)) {
354 std::sort(indices.begin(), indices.end(), comp);
358 template<
class Configure_>
359 std::pair<size_t, size_t> fetch_raw([[maybe_unused]] Index_ i, Configure_ configure) {
360 auto info = my_cache.next(
361 [&](Index_ current) -> std::pair<Index_, Index_> {
362 return std::pair<Index_, Index_>(current / my_target_chunk_length, current % my_target_chunk_length);
364 [&](Index_) ->
size_t {
365 return my_max_slab_size;
367 [&](Index_,
const Slab& slab) ->
size_t {
368 return slab.indptrs.back();
373 [&](std::vector<std::pair<Index_, size_t> >& to_populate, std::vector<std::pair<Index_, size_t> >& to_reuse, std::vector<Slab>& all_slabs) {
376 sort_by_field(to_reuse, [&](
const std::pair<Index_, size_t>& x) ->
size_t {
return all_slabs[x.second].offset; });
377 size_t running_offset = 0;
378 for (
auto& x : to_reuse) {
379 auto& reused_slab = all_slabs[x.second];
380 auto& cur_offset = reused_slab.offset;
381 auto num_nonzero = reused_slab.indptrs.back();
382 if (cur_offset != running_offset) {
383 if (my_needs_value) {
384 my_work.values.shift(cur_offset, num_nonzero, running_offset);
386 if (my_needs_index) {
387 my_work.non_target_indices.shift(cur_offset, num_nonzero, running_offset);
389 cur_offset = running_offset;
391 running_offset += num_nonzero;
399 sort_by_field(to_populate, [](
const std::pair<Index_, size_t>& x) -> Index_ {
return x.first; });
401 size_t num_nonzero = 0;
403 tiledb::Subarray subarray(my_tdb_comp.ctx, my_tdb_comp.array);
405 configure(subarray, rowdex);
407 Index_ run_chunk_id = to_populate.front().first;
408 Index_ run_chunk_start = run_chunk_id * my_target_chunk_length;
409 Index_ run_length = std::min(my_target_dim_extent - run_chunk_start, my_target_chunk_length);
411 int dimdex = 1 - rowdex;
412 for (
size_t ci = 1, cend = to_populate.size(); ci < cend; ++ci) {
413 Index_ current_chunk_id = to_populate[ci].first;
414 Index_ current_chunk_start = current_chunk_id * my_target_chunk_length;
416 if (current_chunk_id - run_chunk_id > 1) {
417 my_tdb_target_dim.add_range(subarray, dimdex, run_chunk_start, run_length);
418 run_chunk_id = current_chunk_id;
419 run_chunk_start = current_chunk_start;
423 run_length += std::min(my_target_dim_extent - current_chunk_start, my_target_chunk_length);
426 my_tdb_target_dim.add_range(subarray, dimdex, run_chunk_start, run_length);
427 num_nonzero = execute_query(
433 my_non_target_dimname,
437 to_populate.size() * my_max_slab_size,
443 my_work.target_indices.compact(running_offset, num_nonzero, my_tdb_target_dim, my_counts);
445 auto cIt = my_counts.begin(), cEnd = my_counts.end();
446 for (
auto& si : to_populate) {
447 auto& populate_slab = all_slabs[si.second];
448 populate_slab.offset = running_offset;
450 Index_ chunk_start = si.first * my_target_chunk_length;
451 Index_ chunk_length = std::min(my_target_dim_extent - chunk_start, my_target_chunk_length);
452 Index_ chunk_end = chunk_start + chunk_length;
454 auto& slab_indptrs = populate_slab.indptrs;
455 slab_indptrs.clear();
456 slab_indptrs.resize(chunk_length + 1);
458 while (cIt != cEnd && cIt->first < chunk_end) {
459 slab_indptrs[cIt->first - chunk_start + 1] = cIt->second;
463 for (Index_ i = 1; i <= chunk_length; ++i) {
464 slab_indptrs[i] += slab_indptrs[i - 1];
466 running_offset += slab_indptrs.back();
471 const auto& indptrs = info.first->indptrs;
472 auto start = indptrs[info.second];
473 return std::make_pair(info.first->offset + start, indptrs[info.second + 1] - start);
477 std::pair<size_t, size_t> fetch_block(Index_ i, Index_ block_start, Index_ block_length) {
478 return fetch_raw(i, [&](tiledb::Subarray& subarray,
int rowdex) {
479 my_tdb_non_target_dim.add_range(subarray, rowdex, block_start, block_length);
483 std::pair<size_t, size_t> fetch_indices(Index_ i,
const std::vector<Index_>& indices) {
484 return fetch_raw(i, [&](tiledb::Subarray& subarray,
int rowdex) {
486 my_tdb_non_target_dim.add_range(subarray, rowdex, s, l);
492 const Workspace& get_workspace()
const {
496 bool get_needs_value()
const {
497 return my_needs_value;
500 bool get_needs_index()
const {
501 return my_needs_index;
504 const Dimension& get_tdb_non_target_dim()
const {
505 return my_tdb_non_target_dim;
509template<
bool oracle_,
typename Index_>
510using SparseCore =
typename std::conditional<oracle_, OracularCore<Index_>, MyopicCore<Index_> >::type;
512template<
bool oracle_,
typename Index_>
513using CacheParameters =
typename std::conditional<oracle_, OracularCacheParameters<Index_>, MyopicCacheParameters<Index_> >::type;
519template<
typename Value_,
typename Index_>
521 const Workspace& work,
524 const Dimension& non_target_dim,
531 output.
number = work_length;
533 work.values.copy(work_start, work_length, vbuffer);
534 output.
value = vbuffer;
537 work.non_target_indices.copy(work_start, work_length, non_target_dim, ibuffer);
538 output.
index = ibuffer;
543template<
bool oracle_,
typename Value_,
typename Index_>
547 const Components& tdb_comp,
548 const std::string& attribute,
550 Index_ target_dim_extent,
551 const std::string& target_dimname,
552 const Dimension& tdb_target_dim,
553 const std::string& non_target_dimname,
554 const Dimension& tdb_non_target_dim,
555 tiledb_datatype_t tdb_type,
557 Index_ non_target_dim,
558 const CacheParameters<oracle_, Index_>& cache_parameters,
577 my_non_target_dim(non_target_dim)
581 auto info = my_core.fetch_block(i, 0, my_non_target_dim);
582 return fill_sparse_range(my_core.get_workspace(), info.first, info.second, my_core.get_tdb_non_target_dim(), vbuffer, ibuffer, my_core.get_needs_value(), my_core.get_needs_index());
586 SparseCore<oracle_, Index_> my_core;
587 Index_ my_non_target_dim;
590template<
bool oracle_,
typename Value_,
typename Index_>
594 const Components& tdb_comp,
595 const std::string& attribute,
597 Index_ target_dim_extent,
598 const std::string& target_dimname,
599 const Dimension& tdb_target_dim,
600 const std::string& non_target_dimname,
601 const Dimension& tdb_non_target_dim,
602 tiledb_datatype_t tdb_type,
606 const CacheParameters<oracle_, Index_>& cache_parameters,
625 my_block_start(block_start),
626 my_block_length(block_length)
630 auto info = my_core.fetch_block(i, my_block_start, my_block_length);
631 return fill_sparse_range(my_core.get_workspace(), info.first, info.second, my_core.get_tdb_non_target_dim(), vbuffer, ibuffer, my_core.get_needs_value(), my_core.get_needs_index());
635 SparseCore<oracle_, Index_> my_core;
636 Index_ my_block_start, my_block_length;
639template<
bool oracle_,
typename Value_,
typename Index_>
643 const Components& tdb_comp,
644 const std::string& attribute,
646 Index_ target_dim_extent,
647 const std::string& target_dimname,
648 const Dimension& tdb_target_dim,
649 const std::string& non_target_dimname,
650 const Dimension& tdb_non_target_dim,
651 tiledb_datatype_t tdb_type,
654 const CacheParameters<oracle_, Index_>& cache_parameters,
673 my_indices_ptr(std::move(indices_ptr))
677 auto info = my_core.fetch_indices(i, *my_indices_ptr);
678 return fill_sparse_range(my_core.get_workspace(), info.first, info.second, my_core.get_tdb_non_target_dim(), vbuffer, ibuffer, my_core.get_needs_value(), my_core.get_needs_index());
682 SparseCore<oracle_, Index_> my_core;
690template<
bool oracle_,
typename Value_,
typename Index_>
694 const Components& tdb_comp,
695 const std::string& attribute,
697 Index_ target_dim_extent,
698 const std::string& target_dimname,
699 const Dimension& tdb_target_dim,
700 const std::string& non_target_dimname,
701 const Dimension& tdb_non_target_dim,
702 tiledb_datatype_t tdb_type,
704 Index_ non_target_dim_extent,
705 const CacheParameters<oracle_, Index_>& cache_parameters,
706 [[maybe_unused]]
bool needs_value,
707 [[maybe_unused]]
bool needs_index) :
718 non_target_dim_extent,
724 my_non_target_dim_extent(non_target_dim_extent),
725 my_holding_value(my_non_target_dim_extent),
726 my_holding_index(my_non_target_dim_extent)
729 const Value_* fetch(Index_ i, Value_* buffer) {
730 auto info = my_core.fetch_block(i, 0, my_non_target_dim_extent);
731 const auto& work = my_core.get_workspace();
732 work.values.copy(info.first, info.second, my_holding_value.data());
733 work.non_target_indices.copy(info.first, info.second, my_core.get_tdb_non_target_dim(), my_holding_index.data());
734 std::fill_n(buffer, my_non_target_dim_extent, 0);
735 for (
size_t i = 0; i < info.second; ++i) {
736 buffer[my_holding_index[i]] = my_holding_value[i];
742 SparseCore<oracle_, Index_> my_core;
743 Index_ my_non_target_dim_extent;
744 std::vector<Value_> my_holding_value;
745 std::vector<Index_> my_holding_index;
748template<
bool oracle_,
typename Value_,
typename Index_>
752 const Components& tdb_comp,
753 const std::string& attribute,
755 Index_ target_dim_extent,
756 const std::string& target_dimname,
757 const Dimension& tdb_target_dim,
758 const std::string& non_target_dimname,
759 const Dimension& tdb_non_target_dim,
760 tiledb_datatype_t tdb_type,
764 const CacheParameters<oracle_, Index_>& cache_parameters,
765 [[maybe_unused]]
bool needs_value,
766 [[maybe_unused]]
bool needs_index) :
783 my_block_start(block_start),
784 my_block_length(block_length),
785 my_holding_value(block_length),
786 my_holding_index(block_length)
789 const Value_* fetch(Index_ i, Value_* buffer) {
790 auto info = my_core.fetch_block(i, my_block_start, my_block_length);
791 const auto& work = my_core.get_workspace();
792 work.values.copy(info.first, info.second, my_holding_value.data());
793 work.non_target_indices.copy(info.first, info.second, my_core.get_tdb_non_target_dim(), my_holding_index.data());
794 std::fill_n(buffer, my_block_length, 0);
795 for (
size_t i = 0; i < info.second; ++i) {
796 buffer[my_holding_index[i] - my_block_start] = my_holding_value[i];
802 SparseCore<oracle_, Index_> my_core;
803 Index_ my_block_start, my_block_length;
804 std::vector<Value_> my_holding_value;
805 std::vector<Index_> my_holding_index;
808template<
bool oracle_,
typename Value_,
typename Index_>
812 const Components& tdb_comp,
813 const std::string& attribute,
815 Index_ target_dim_extent,
816 const std::string& target_dimname,
817 const Dimension& tdb_target_dim,
818 const std::string& non_target_dimname,
819 const Dimension& tdb_non_target_dim,
820 tiledb_datatype_t tdb_type,
823 const CacheParameters<oracle_, Index_>& cache_parameters,
824 [[maybe_unused]]
bool needs_value,
825 [[maybe_unused]]
bool needs_index) :
842 my_indices_ptr(std::move(indices_ptr)),
843 my_holding_value(my_indices_ptr->size()),
844 my_holding_index(my_indices_ptr->size())
846 const auto& indices = *my_indices_ptr;
847 if (!indices.empty()) {
848 auto idx_start = indices.front();
849 my_remapping.resize(indices.back() - idx_start + 1);
850 for (
size_t j = 0, end = indices.size(); j < end; ++j) {
851 my_remapping[indices[j] - idx_start] = j;
856 const Value_* fetch(Index_ i, Value_* buffer) {
857 const auto& indices = *my_indices_ptr;
859 if (!indices.empty()) {
860 auto info = my_core.fetch_indices(i, indices);
861 const auto& work = my_core.get_workspace();
862 work.values.copy(info.first, info.second, my_holding_value.data());
863 work.non_target_indices.copy(info.first, info.second, my_core.get_tdb_non_target_dim(), my_holding_index.data());
864 auto idx_start = indices.front();
865 std::fill_n(buffer, indices.size(), 0);
866 for (
size_t i = 0; i < info.second; ++i) {
867 buffer[my_remapping[my_holding_index[i] - idx_start]] = my_holding_value[i];
875 SparseCore<oracle_, Index_> my_core;
877 std::vector<Index_> my_remapping;
878 std::vector<Value_> my_holding_value;
879 std::vector<Index_> my_holding_index;
901template<
typename Value_,
typename Index_>
911 initialize(uri, std::move(ctx), options);
920 initialize(uri,
false, options);
930 template<
class PossibleContext_>
931 void initialize(
const std::string& uri, PossibleContext_ ctx,
const SparseMatrixOptions& options) {
937 if constexpr(std::is_same<PossibleContext_, tiledb::Context>::value) {
938 return new SparseMatrix_internal::Components(std::move(ctx), uri);
940 return new SparseMatrix_internal::Components(uri);
943 [](SparseMatrix_internal::Components* ptr) {
951 auto schema = my_tdb_comp->array.schema();
952 if (schema.array_type() != TILEDB_SPARSE) {
953 throw std::runtime_error(
"TileDB array should be sparse");
955 my_cell_order = schema.cell_order();
960 if (!schema.has_attribute(my_attribute)) {
961 throw std::runtime_error(
"no attribute '" + my_attribute +
"' is present in the TileDB array");
963 auto attr = schema.attribute(my_attribute);
964 my_tdb_type = attr.type();
966 tiledb::Domain domain = schema.domain();
967 if (domain.ndim() != 2) {
968 throw std::runtime_error(
"TileDB array should have exactly two dimensions");
971 tiledb::Dimension first_dim = domain.dimension(0);
972 my_first_dimname = first_dim.name();
973 my_tdb_first_dim.reset(first_dim);
974 Index_ first_extent = my_tdb_first_dim.extent<Index_>();
975 Index_ first_tile = my_tdb_first_dim.tile<Index_>();
976 my_firstdim_stats = tatami_chunked::ChunkDimensionStats<Index_>(first_extent, first_tile);
978 tiledb::Dimension second_dim = domain.dimension(1);
979 my_second_dimname = second_dim.name();
980 my_tdb_second_dim.reset(second_dim);
981 Index_ second_extent = my_tdb_second_dim.extent<Index_>();
982 Index_ second_tile = my_tdb_second_dim.tile<Index_>();
983 my_seconddim_stats = tatami_chunked::ChunkDimensionStats<Index_>(second_extent, second_tile);
986 auto tiles_per_firstdim = (second_extent / second_tile) + (second_extent % second_tile > 0);
987 auto tiles_per_seconddim = (first_extent / first_tile) + (first_extent % first_tile > 0);
988 my_prefer_firstdim = tiles_per_firstdim <= tiles_per_seconddim;
993 std::shared_ptr<SparseMatrix_internal::Components> my_tdb_comp;
994 tiledb_layout_t my_cell_order;
995 tiledb_datatype_t my_tdb_type;
997 std::string my_attribute;
998 size_t my_cache_size_in_bytes;
999 bool my_require_minimum_cache;
1001 std::string my_first_dimname, my_second_dimname;
1002 SparseMatrix_internal::Dimension my_tdb_first_dim, my_tdb_second_dim;
1003 tatami_chunked::ChunkDimensionStats<Index_> my_firstdim_stats, my_seconddim_stats;
1005 bool my_prefer_firstdim;
1008 Index_ nrow_internal()
const {
1009 return my_firstdim_stats.dimension_extent;
1012 Index_ ncol_internal()
const {
1013 return my_seconddim_stats.dimension_extent;
1017 Index_ nrow()
const {
1018 return nrow_internal();
1021 Index_ ncol()
const {
1022 return ncol_internal();
1025 bool is_sparse()
const {
1029 double is_sparse_proportion()
const {
1033 bool prefer_rows()
const {
1034 return my_prefer_firstdim;
1037 double prefer_rows_proportion()
const {
1038 return static_cast<double>(my_prefer_firstdim);
1041 bool uses_oracle(
bool)
const {
1044 return my_cache_size_in_bytes > 0;
1050 template<
typename,
typename>
class Interface_,
1051 template<
bool,
typename,
typename>
class Extractor_,
1054 std::unique_ptr<Interface_<Value_, Index_> > populate(
1056 Index_ non_target_length,
1061 const auto& target_dim_stats = (row ? my_firstdim_stats : my_seconddim_stats);
1062 const auto& target_dimname = (row ? my_first_dimname : my_second_dimname);
1063 const auto& non_target_dimname = (row ? my_second_dimname : my_first_dimname);
1064 const auto& tdb_target_dim = (row ? my_tdb_first_dim : my_tdb_second_dim);
1065 const auto& tdb_non_target_dim = (row ? my_tdb_second_dim : my_tdb_first_dim);
1067 size_t nonzero_size = 0;
1069 nonzero_size += ::tatami_tiledb::internal::determine_type_size(my_tdb_type);
1072 nonzero_size += ::tatami_tiledb::internal::determine_type_size(tdb_non_target_dim.type());
1075 if constexpr(oracle_) {
1079 nonzero_size += ::tatami_tiledb::internal::determine_type_size(tdb_target_dim.type());
1081 SparseMatrix_internal::OracularCacheParameters<Index_> cache_params;
1082 cache_params.max_cache_size_in_elements = my_cache_size_in_bytes / nonzero_size;
1095 cache_params.chunk_length = (row == (my_cell_order == TILEDB_ROW_MAJOR) ? 1 : target_dim_stats.chunk_length);
1103 size_t max_slab_size =
static_cast<size_t>(non_target_length) * cache_params.chunk_length;
1104 if (my_require_minimum_cache) {
1105 cache_params.max_cache_size_in_elements = std::max(cache_params.max_cache_size_in_elements, max_slab_size);
1106 }
else if (cache_params.max_cache_size_in_elements < max_slab_size) {
1107 cache_params.max_cache_size_in_elements = non_target_length;
1108 cache_params.chunk_length = 1;
1111 return std::make_unique<Extractor_<oracle_, Value_, Index_> >(
1115 target_dim_stats.dimension_extent,
1122 std::forward<Args_>(args)...,
1129 tatami_chunked::SlabCacheStats raw_params(
1130 target_dim_stats.chunk_length,
1132 target_dim_stats.num_chunks,
1133 my_cache_size_in_bytes,
1135 my_require_minimum_cache
1143 SparseMatrix_internal::MyopicCacheParameters<Index_> cache_params;
1144 if (raw_params.max_slabs_in_cache > 0) {
1145 cache_params.chunk_length = target_dim_stats.chunk_length;
1146 cache_params.slab_size_in_elements = raw_params.slab_size_in_elements;
1147 cache_params.max_slabs_in_cache = raw_params.max_slabs_in_cache;
1149 cache_params.chunk_length = 1;
1150 cache_params.slab_size_in_elements = non_target_length;
1151 cache_params.max_slabs_in_cache = 1;
1154 return std::make_unique<Extractor_<oracle_, Value_, Index_> >(
1158 target_dim_stats.dimension_extent,
1165 std::forward<Args_>(args)...,
1185 std::unique_ptr<tatami::MyopicDenseExtractor<Value_, Index_> > dense(
bool row,
const tatami::Options& opt)
const {
1186 Index_ full_non_target = (row ? ncol_internal() : nrow_internal());
1187 return populate<false, tatami::MyopicDenseExtractor, SparseMatrix_internal::DenseFull>(row, full_non_target,
false, set_extract_all(opt), full_non_target);
1190 std::unique_ptr<tatami::MyopicDenseExtractor<Value_, Index_> > dense(
bool row, Index_ block_start, Index_ block_length,
const tatami::Options& opt)
const {
1191 return populate<false, tatami::MyopicDenseExtractor, SparseMatrix_internal::DenseBlock>(row, block_length,
false, set_extract_all(opt), block_start, block_length);
1195 auto nidx = indices_ptr->size();
1196 return populate<false, tatami::MyopicDenseExtractor, SparseMatrix_internal::DenseIndex>(row, nidx,
false, set_extract_all(opt), std::move(indices_ptr));
1203 std::unique_ptr<tatami::MyopicSparseExtractor<Value_, Index_> > sparse(
bool row,
const tatami::Options& opt)
const {
1204 Index_ full_non_target = (row ? ncol_internal() : nrow_internal());
1205 return populate<false, tatami::MyopicSparseExtractor, SparseMatrix_internal::SparseFull>(row, full_non_target,
false, opt, full_non_target);
1208 std::unique_ptr<tatami::MyopicSparseExtractor<Value_, Index_> > sparse(
bool row, Index_ block_start, Index_ block_length,
const tatami::Options& opt)
const {
1209 return populate<false, tatami::MyopicSparseExtractor, SparseMatrix_internal::SparseBlock>(row, block_length,
false, opt, block_start, block_length);
1213 auto nidx = indices_ptr->size();
1214 return populate<false, tatami::MyopicSparseExtractor, SparseMatrix_internal::SparseIndex>(row, nidx,
false, opt, std::move(indices_ptr));
1221 std::unique_ptr<tatami::OracularDenseExtractor<Value_, Index_> > dense(
1226 Index_ full_non_target = (row ? ncol_internal() : nrow_internal());
1227 return populate<true, tatami::OracularDenseExtractor, SparseMatrix_internal::DenseFull>(row, full_non_target, std::move(oracle), set_extract_all(opt), full_non_target);
1230 std::unique_ptr<tatami::OracularDenseExtractor<Value_, Index_> > dense(
1234 Index_ block_length,
1237 return populate<true, tatami::OracularDenseExtractor, SparseMatrix_internal::DenseBlock>(row, block_length, std::move(oracle), set_extract_all(opt), block_start, block_length);
1240 std::unique_ptr<tatami::OracularDenseExtractor<Value_, Index_> > dense(
1246 auto nidx = indices_ptr->size();
1247 return populate<true, tatami::OracularDenseExtractor, SparseMatrix_internal::DenseIndex>(row, nidx, std::move(oracle), set_extract_all(opt), std::move(indices_ptr));
1254 std::unique_ptr<tatami::OracularSparseExtractor<Value_, Index_> > sparse(
1259 Index_ full_non_target = (row ? ncol_internal() : nrow_internal());
1260 return populate<true, tatami::OracularSparseExtractor, SparseMatrix_internal::SparseFull>(row, full_non_target, std::move(oracle), opt, full_non_target);
1263 std::unique_ptr<tatami::OracularSparseExtractor<Value_, Index_> > sparse(
1267 Index_ block_length,
1270 return populate<true, tatami::OracularSparseExtractor, SparseMatrix_internal::SparseBlock>(row, block_length, std::move(oracle), opt, block_start, block_length);
1273 std::unique_ptr<tatami::OracularSparseExtractor<Value_, Index_> > sparse(
1279 auto nidx = indices_ptr->size();
1280 return populate<true, tatami::OracularSparseExtractor, SparseMatrix_internal::SparseIndex>(row, nidx, std::move(oracle), opt, std::move(indices_ptr));
TileDB-backed sparse matrix.
Definition SparseMatrix.hpp:902
SparseMatrix(const std::string &uri, std::string attribute, const SparseMatrixOptions &options)
Definition SparseMatrix.hpp:919
SparseMatrix(const std::string &uri, std::string attribute)
Definition SparseMatrix.hpp:927
SparseMatrix(const std::string &uri, std::string attribute, tiledb::Context ctx, const SparseMatrixOptions &options)
Definition SparseMatrix.hpp:910
tatami bindings for TileDB matrices.
Definition DenseMatrix.hpp:20
void serialize(Function_ fun)
Definition serialize.hpp:20
typename std::conditional< oracle_, OracularDenseExtractor< Value_, Index_ >, MyopicDenseExtractor< Value_, Index_ > >::type DenseExtractor
typename std::conditional< oracle_, OracularSparseExtractor< Value_, Index_ >, MyopicSparseExtractor< Value_, Index_ > >::type SparseExtractor
typename std::conditional< oracle_, std::shared_ptr< const Oracle< Index_ > >, bool >::type MaybeOracle
std::shared_ptr< const std::vector< Index_ > > VectorPtr
void process_consecutive_indices(const Index_ *indices, Index_ length, Function_ fun)
Locking for serial access.
bool sparse_extract_index
bool sparse_extract_value
Options for sparse TileDB extraction.
Definition SparseMatrix.hpp:27
size_t maximum_cache_size
Definition SparseMatrix.hpp:37
bool require_minimum_cache
Definition SparseMatrix.hpp:44