1#ifndef TATAMI_TILEDB_SPARSE_MATRIX_HPP
2#define TATAMI_TILEDB_SPARSE_MATRIX_HPP
4#include "tatami_chunked/tatami_chunked.hpp"
5#include <tiledb/tiledb>
50namespace SparseMatrix_internal {
52typedef ::tatami_tiledb::internal::Components Components;
53typedef ::tatami_tiledb::internal::VariablyTypedDimension Dimension;
54typedef ::tatami_tiledb::internal::VariablyTypedVector CacheBuffer;
58 CacheBuffer target_indices;
59 CacheBuffer non_target_indices;
62inline size_t execute_query(
63 const Components& tdb_comp,
64 tiledb::Subarray& subarray,
65 const std::string& attribute,
67 const std::string& target_dimname,
68 const std::string& non_target_dimname,
70 size_t general_work_offset,
71 size_t target_index_work_offset,
76 tiledb::Query query(tdb_comp.ctx, tdb_comp.array);
77 query.set_subarray(subarray);
78 query.set_layout(row ? TILEDB_ROW_MAJOR : TILEDB_COL_MAJOR);
80 work.target_indices.set_data_buffer(query, target_dimname, target_index_work_offset, work_length);
82 work.values.set_data_buffer(query, attribute, general_work_offset, work_length);
85 work.non_target_indices.set_data_buffer(query, non_target_dimname, general_work_offset, work_length);
88 if (query.submit() != tiledb::Query::Status::COMPLETE) {
89 throw std::runtime_error(
"failed to read sparse data from TileDB");
92 return query.result_buffer_elements()[target_dimname].second;
99template<
typename Index_>
100struct MyopicCacheParameters {
102 size_t slab_size_in_elements;
103 size_t max_slabs_in_cache;
106template<
typename Index_>
110 const Components& tdb_comp,
111 const std::string& attribute,
113 Index_ target_dim_extent,
114 const std::string& target_dimname,
115 const Dimension& tdb_target_dim,
116 const std::string& non_target_dimname,
117 const Dimension& tdb_non_target_dim,
118 tiledb_datatype_t tdb_type,
119 [[maybe_unused]] Index_ non_target_length,
121 const MyopicCacheParameters<Index_>& cache_stats,
124 my_tdb_comp(tdb_comp),
125 my_attribute(attribute),
127 my_target_dim_extent(target_dim_extent),
128 my_tdb_target_dim(tdb_target_dim),
129 my_target_dimname(target_dimname),
130 my_tdb_non_target_dim(tdb_non_target_dim),
131 my_non_target_dimname(non_target_dimname),
132 my_target_chunk_length(cache_stats.chunk_length),
133 my_slab_size(cache_stats.slab_size_in_elements),
134 my_needs_value(needs_value),
135 my_needs_index(needs_index),
136 my_cache(cache_stats.max_slabs_in_cache)
139 my_work.target_indices.reset(my_tdb_target_dim.type(), my_slab_size);
141 size_t total_cache_size = my_slab_size * cache_stats.max_slabs_in_cache;
142 if (my_needs_value) {
143 my_work.values.reset(tdb_type, total_cache_size);
145 if (my_needs_index) {
146 my_work.non_target_indices.reset(my_tdb_non_target_dim.type(), total_cache_size);
151 const Components& my_tdb_comp;
152 const std::string& my_attribute;
155 Index_ my_target_dim_extent;
156 const Dimension& my_tdb_target_dim;
157 const std::string& my_target_dimname;
158 const Dimension& my_tdb_non_target_dim;
159 const std::string& my_non_target_dimname;
161 Index_ my_target_chunk_length;
166 std::vector<std::pair<Index_, Index_> > my_counts;
170 std::vector<size_t> indptrs;
172 size_t my_offset = 0;
173 tatami_chunked::LruSlabCache<Index_, Slab> my_cache;
176 template<
class Configure_>
177 std::pair<size_t, size_t> fetch_raw(Index_ i, Configure_ configure) {
178 Index_ chunk = i / my_target_chunk_length;
179 Index_ index = i % my_target_chunk_length;
181 const auto& info = my_cache.find(
185 output.offset = my_offset;
186 my_offset += my_slab_size;
189 [&](Index_ id, Slab& contents) ->
void {
190 Index_ chunk_start =
id * my_target_chunk_length;
191 Index_ chunk_length = std::min(my_target_dim_extent - chunk_start, my_target_chunk_length);
193 size_t num_nonzero = 0;
195 tiledb::Subarray subarray(my_tdb_comp.ctx, my_tdb_comp.array);
197 my_tdb_target_dim.add_range(subarray, 1 - rowdex, chunk_start, chunk_length);
198 configure(subarray, rowdex);
199 num_nonzero = execute_query(
205 my_non_target_dimname,
215 auto& indptrs = contents.indptrs;
217 indptrs.resize(chunk_length + 1);
220 my_work.target_indices.compact(0, num_nonzero, my_tdb_target_dim, my_counts);
221 for (
const auto& cnts : my_counts) {
222 indptrs[cnts.first - chunk_start + 1] = cnts.second;
224 for (Index_ i = 1; i <= chunk_length; ++i) {
225 indptrs[i] += indptrs[i - 1];
231 auto start = info.indptrs[index];
232 return std::make_pair(info.offset + start, info.indptrs[index + 1] - start);
236 std::pair<size_t, size_t> fetch_block(Index_ i, Index_ block_start, Index_ block_length) {
239 [&](tiledb::Subarray& subarray,
int rowdex) ->
void {
240 my_tdb_non_target_dim.add_range(subarray, rowdex, block_start, block_length);
245 std::pair<size_t, size_t> fetch_indices(Index_ i,
const std::vector<Index_>& indices) {
248 [&](tiledb::Subarray& subarray,
int rowdex) ->
void {
252 [&](Index_ s, Index_ l) ->
void {
253 my_tdb_non_target_dim.add_range(subarray, rowdex, s, l);
261 const Workspace& get_workspace()
const {
265 bool get_needs_value()
const {
266 return my_needs_value;
269 bool get_needs_index()
const {
270 return my_needs_index;
273 const Dimension& get_tdb_non_target_dim()
const {
274 return my_tdb_non_target_dim;
286template<
typename Index_>
287struct OracularCacheParameters {
289 size_t max_cache_size_in_elements;
292template<
typename Index_>
296 const Components& tdb_comp,
297 const std::string& attribute,
299 Index_ target_dim_extent,
300 const std::string& target_dimname,
301 const Dimension& tdb_target_dim,
302 const std::string& non_target_dimname,
303 const Dimension& tdb_non_target_dim,
304 tiledb_datatype_t tdb_type,
305 Index_ non_target_length,
307 const OracularCacheParameters<Index_>& cache_stats,
310 my_tdb_comp(tdb_comp),
311 my_attribute(attribute),
313 my_target_dim_extent(target_dim_extent),
314 my_tdb_target_dim(tdb_target_dim),
315 my_target_dimname(target_dimname),
316 my_tdb_non_target_dim(tdb_non_target_dim),
317 my_non_target_dimname(non_target_dimname),
318 my_target_chunk_length(cache_stats.chunk_length),
319 my_max_slab_size(static_cast<size_t>(non_target_length) * my_target_chunk_length),
320 my_needs_value(needs_value),
321 my_needs_index(needs_index),
322 my_cache(std::move(oracle), cache_stats.max_cache_size_in_elements)
324 my_work.target_indices.reset(my_tdb_target_dim.type(), cache_stats.max_cache_size_in_elements);
325 if (my_needs_value) {
326 my_work.values.reset(tdb_type, cache_stats.max_cache_size_in_elements);
328 if (my_needs_index) {
329 my_work.non_target_indices.reset(my_tdb_non_target_dim.type(), cache_stats.max_cache_size_in_elements);
334 const Components& my_tdb_comp;
335 const std::string& my_attribute;
338 Index_ my_target_dim_extent;
339 const Dimension& my_tdb_target_dim;
340 const std::string& my_target_dimname;
341 const Dimension& my_tdb_non_target_dim;
342 const std::string& my_non_target_dimname;
344 Index_ my_target_chunk_length;
345 size_t my_max_slab_size;
349 std::vector<std::pair<Index_, Index_> > my_counts;
353 std::vector<size_t> indptrs;
355 tatami_chunked::OracularVariableSlabCache<Index_, Index_, Slab, size_t> my_cache;
358 template<
class Function_>
359 static void sort_by_field(std::vector<std::pair<Index_, size_t> >& indices, Function_ field) {
360 auto comp = [&field](
const std::pair<Index_, size_t>& l,
const std::pair<Index_, size_t>& r) ->
bool {
361 return field(l) < field(r);
363 if (!std::is_sorted(indices.begin(), indices.end(), comp)) {
364 std::sort(indices.begin(), indices.end(), comp);
368 template<
class Configure_>
369 std::pair<size_t, size_t> fetch_raw([[maybe_unused]] Index_ i, Configure_ configure) {
370 auto info = my_cache.next(
371 [&](Index_ current) -> std::pair<Index_, Index_> {
372 return std::pair<Index_, Index_>(current / my_target_chunk_length, current % my_target_chunk_length);
374 [&](Index_) ->
size_t {
375 return my_max_slab_size;
377 [&](Index_,
const Slab& slab) ->
size_t {
378 return slab.indptrs.back();
383 [&](std::vector<std::pair<Index_, size_t> >& to_populate, std::vector<std::pair<Index_, size_t> >& to_reuse, std::vector<Slab>& all_slabs) ->
void {
386 sort_by_field(to_reuse, [&](
const std::pair<Index_, size_t>& x) ->
size_t {
return all_slabs[x.second].offset; });
387 size_t running_offset = 0;
388 for (
auto& x : to_reuse) {
389 auto& reused_slab = all_slabs[x.second];
390 auto& cur_offset = reused_slab.offset;
391 auto num_nonzero = reused_slab.indptrs.back();
392 if (cur_offset != running_offset) {
393 if (my_needs_value) {
394 my_work.values.shift(cur_offset, num_nonzero, running_offset);
396 if (my_needs_index) {
397 my_work.non_target_indices.shift(cur_offset, num_nonzero, running_offset);
399 cur_offset = running_offset;
401 running_offset += num_nonzero;
409 sort_by_field(to_populate, [](
const std::pair<Index_, size_t>& x) -> Index_ {
return x.first; });
411 size_t num_nonzero = 0;
413 tiledb::Subarray subarray(my_tdb_comp.ctx, my_tdb_comp.array);
415 configure(subarray, rowdex);
417 Index_ run_chunk_id = to_populate.front().first;
418 Index_ run_chunk_start = run_chunk_id * my_target_chunk_length;
419 Index_ run_length = std::min(my_target_dim_extent - run_chunk_start, my_target_chunk_length);
421 int dimdex = 1 - rowdex;
422 for (
size_t ci = 1, cend = to_populate.size(); ci < cend; ++ci) {
423 Index_ current_chunk_id = to_populate[ci].first;
424 Index_ current_chunk_start = current_chunk_id * my_target_chunk_length;
426 if (current_chunk_id - run_chunk_id > 1) {
427 my_tdb_target_dim.add_range(subarray, dimdex, run_chunk_start, run_length);
428 run_chunk_id = current_chunk_id;
429 run_chunk_start = current_chunk_start;
433 run_length += std::min(my_target_dim_extent - current_chunk_start, my_target_chunk_length);
436 my_tdb_target_dim.add_range(subarray, dimdex, run_chunk_start, run_length);
437 num_nonzero = execute_query(
443 my_non_target_dimname,
447 to_populate.size() * my_max_slab_size,
453 my_work.target_indices.compact(running_offset, num_nonzero, my_tdb_target_dim, my_counts);
455 auto cIt = my_counts.begin(), cEnd = my_counts.end();
456 for (
auto& si : to_populate) {
457 auto& populate_slab = all_slabs[si.second];
458 populate_slab.offset = running_offset;
460 Index_ chunk_start = si.first * my_target_chunk_length;
461 Index_ chunk_length = std::min(my_target_dim_extent - chunk_start, my_target_chunk_length);
462 Index_ chunk_end = chunk_start + chunk_length;
464 auto& slab_indptrs = populate_slab.indptrs;
465 slab_indptrs.clear();
466 slab_indptrs.resize(chunk_length + 1);
468 while (cIt != cEnd && cIt->first < chunk_end) {
469 slab_indptrs[cIt->first - chunk_start + 1] = cIt->second;
473 for (Index_ i = 1; i <= chunk_length; ++i) {
474 slab_indptrs[i] += slab_indptrs[i - 1];
476 running_offset += slab_indptrs.back();
481 const auto& indptrs = info.first->indptrs;
482 auto start = indptrs[info.second];
483 return std::make_pair(info.first->offset + start, indptrs[info.second + 1] - start);
487 std::pair<size_t, size_t> fetch_block(Index_ i, Index_ block_start, Index_ block_length) {
490 [&](tiledb::Subarray& subarray,
int rowdex) ->
void {
491 my_tdb_non_target_dim.add_range(subarray, rowdex, block_start, block_length);
496 std::pair<size_t, size_t> fetch_indices(Index_ i,
const std::vector<Index_>& indices) {
499 [&](tiledb::Subarray& subarray,
int rowdex) ->
void {
503 [&](Index_ s, Index_ l) ->
void {
504 my_tdb_non_target_dim.add_range(subarray, rowdex, s, l);
512 const Workspace& get_workspace()
const {
516 bool get_needs_value()
const {
517 return my_needs_value;
520 bool get_needs_index()
const {
521 return my_needs_index;
524 const Dimension& get_tdb_non_target_dim()
const {
525 return my_tdb_non_target_dim;
529template<
bool oracle_,
typename Index_>
530using SparseCore =
typename std::conditional<oracle_, OracularCore<Index_>, MyopicCore<Index_> >::type;
532template<
bool oracle_,
typename Index_>
533using CacheParameters =
typename std::conditional<oracle_, OracularCacheParameters<Index_>, MyopicCacheParameters<Index_> >::type;
539template<
typename Value_,
typename Index_>
541 const Workspace& work,
544 const Dimension& non_target_dim,
551 output.
number = work_length;
553 work.values.copy(work_start, work_length, vbuffer);
554 output.
value = vbuffer;
557 work.non_target_indices.copy(work_start, work_length, non_target_dim, ibuffer);
558 output.
index = ibuffer;
563template<
bool oracle_,
typename Value_,
typename Index_>
567 const Components& tdb_comp,
568 const std::string& attribute,
570 Index_ target_dim_extent,
571 const std::string& target_dimname,
572 const Dimension& tdb_target_dim,
573 const std::string& non_target_dimname,
574 const Dimension& tdb_non_target_dim,
575 tiledb_datatype_t tdb_type,
577 Index_ non_target_dim,
578 const CacheParameters<oracle_, Index_>& cache_parameters,
597 my_non_target_dim(non_target_dim)
601 auto info = my_core.fetch_block(i, 0, my_non_target_dim);
602 return fill_sparse_range(my_core.get_workspace(), info.first, info.second, my_core.get_tdb_non_target_dim(), vbuffer, ibuffer, my_core.get_needs_value(), my_core.get_needs_index());
606 SparseCore<oracle_, Index_> my_core;
607 Index_ my_non_target_dim;
610template<
bool oracle_,
typename Value_,
typename Index_>
614 const Components& tdb_comp,
615 const std::string& attribute,
617 Index_ target_dim_extent,
618 const std::string& target_dimname,
619 const Dimension& tdb_target_dim,
620 const std::string& non_target_dimname,
621 const Dimension& tdb_non_target_dim,
622 tiledb_datatype_t tdb_type,
626 const CacheParameters<oracle_, Index_>& cache_parameters,
645 my_block_start(block_start),
646 my_block_length(block_length)
650 auto info = my_core.fetch_block(i, my_block_start, my_block_length);
651 return fill_sparse_range(my_core.get_workspace(), info.first, info.second, my_core.get_tdb_non_target_dim(), vbuffer, ibuffer, my_core.get_needs_value(), my_core.get_needs_index());
655 SparseCore<oracle_, Index_> my_core;
656 Index_ my_block_start, my_block_length;
659template<
bool oracle_,
typename Value_,
typename Index_>
663 const Components& tdb_comp,
664 const std::string& attribute,
666 Index_ target_dim_extent,
667 const std::string& target_dimname,
668 const Dimension& tdb_target_dim,
669 const std::string& non_target_dimname,
670 const Dimension& tdb_non_target_dim,
671 tiledb_datatype_t tdb_type,
674 const CacheParameters<oracle_, Index_>& cache_parameters,
693 my_indices_ptr(std::move(indices_ptr))
697 auto info = my_core.fetch_indices(i, *my_indices_ptr);
698 return fill_sparse_range(my_core.get_workspace(), info.first, info.second, my_core.get_tdb_non_target_dim(), vbuffer, ibuffer, my_core.get_needs_value(), my_core.get_needs_index());
702 SparseCore<oracle_, Index_> my_core;
710template<
bool oracle_,
typename Value_,
typename Index_>
714 const Components& tdb_comp,
715 const std::string& attribute,
717 Index_ target_dim_extent,
718 const std::string& target_dimname,
719 const Dimension& tdb_target_dim,
720 const std::string& non_target_dimname,
721 const Dimension& tdb_non_target_dim,
722 tiledb_datatype_t tdb_type,
724 Index_ non_target_dim_extent,
725 const CacheParameters<oracle_, Index_>& cache_parameters,
726 [[maybe_unused]]
bool needs_value,
727 [[maybe_unused]]
bool needs_index) :
738 non_target_dim_extent,
744 my_non_target_dim_extent(non_target_dim_extent),
745 my_holding_value(my_non_target_dim_extent),
746 my_holding_index(my_non_target_dim_extent)
749 const Value_* fetch(Index_ i, Value_* buffer) {
750 auto info = my_core.fetch_block(i, 0, my_non_target_dim_extent);
751 const auto& work = my_core.get_workspace();
752 work.values.copy(info.first, info.second, my_holding_value.data());
753 work.non_target_indices.copy(info.first, info.second, my_core.get_tdb_non_target_dim(), my_holding_index.data());
754 std::fill_n(buffer, my_non_target_dim_extent, 0);
755 for (
size_t i = 0; i < info.second; ++i) {
756 buffer[my_holding_index[i]] = my_holding_value[i];
762 SparseCore<oracle_, Index_> my_core;
763 Index_ my_non_target_dim_extent;
764 std::vector<Value_> my_holding_value;
765 std::vector<Index_> my_holding_index;
768template<
bool oracle_,
typename Value_,
typename Index_>
772 const Components& tdb_comp,
773 const std::string& attribute,
775 Index_ target_dim_extent,
776 const std::string& target_dimname,
777 const Dimension& tdb_target_dim,
778 const std::string& non_target_dimname,
779 const Dimension& tdb_non_target_dim,
780 tiledb_datatype_t tdb_type,
784 const CacheParameters<oracle_, Index_>& cache_parameters,
785 [[maybe_unused]]
bool needs_value,
786 [[maybe_unused]]
bool needs_index) :
803 my_block_start(block_start),
804 my_block_length(block_length),
805 my_holding_value(block_length),
806 my_holding_index(block_length)
809 const Value_* fetch(Index_ i, Value_* buffer) {
810 auto info = my_core.fetch_block(i, my_block_start, my_block_length);
811 const auto& work = my_core.get_workspace();
812 work.values.copy(info.first, info.second, my_holding_value.data());
813 work.non_target_indices.copy(info.first, info.second, my_core.get_tdb_non_target_dim(), my_holding_index.data());
814 std::fill_n(buffer, my_block_length, 0);
815 for (
size_t i = 0; i < info.second; ++i) {
816 buffer[my_holding_index[i] - my_block_start] = my_holding_value[i];
822 SparseCore<oracle_, Index_> my_core;
823 Index_ my_block_start, my_block_length;
824 std::vector<Value_> my_holding_value;
825 std::vector<Index_> my_holding_index;
828template<
bool oracle_,
typename Value_,
typename Index_>
832 const Components& tdb_comp,
833 const std::string& attribute,
835 Index_ target_dim_extent,
836 const std::string& target_dimname,
837 const Dimension& tdb_target_dim,
838 const std::string& non_target_dimname,
839 const Dimension& tdb_non_target_dim,
840 tiledb_datatype_t tdb_type,
843 const CacheParameters<oracle_, Index_>& cache_parameters,
844 [[maybe_unused]]
bool needs_value,
845 [[maybe_unused]]
bool needs_index) :
862 my_indices_ptr(std::move(indices_ptr)),
863 my_holding_value(my_indices_ptr->size()),
864 my_holding_index(my_indices_ptr->size())
866 const auto& indices = *my_indices_ptr;
867 if (!indices.empty()) {
868 auto idx_start = indices.front();
869 my_remapping.resize(indices.back() - idx_start + 1);
870 for (size_t j = 0, end = indices.size(); j < end; ++j) {
871 my_remapping[indices[j] - idx_start] = j;
876 const Value_* fetch(Index_ i, Value_* buffer) {
877 const auto& indices = *my_indices_ptr;
879 if (!indices.empty()) {
880 auto info = my_core.fetch_indices(i, indices);
881 const auto& work = my_core.get_workspace();
882 work.values.copy(info.first, info.second, my_holding_value.data());
883 work.non_target_indices.copy(info.first, info.second, my_core.get_tdb_non_target_dim(), my_holding_index.data());
884 auto idx_start = indices.front();
885 std::fill_n(buffer, indices.size(), 0);
886 for (
size_t i = 0; i < info.second; ++i) {
887 buffer[my_remapping[my_holding_index[i] - idx_start]] = my_holding_value[i];
895 SparseCore<oracle_, Index_> my_core;
897 std::vector<Index_> my_remapping;
898 std::vector<Value_> my_holding_value;
899 std::vector<Index_> my_holding_index;
921template<
typename Value_,
typename Index_>
931 initialize(uri, std::move(ctx), options);
940 initialize(uri,
false, options);
950 template<
class PossibleContext_>
951 void initialize(
const std::string& uri, PossibleContext_ ctx,
const SparseMatrixOptions& options) {
957 if constexpr(std::is_same<PossibleContext_, tiledb::Context>::value) {
958 return new SparseMatrix_internal::Components(std::move(ctx), uri);
960 return new SparseMatrix_internal::Components(uri);
963 [](SparseMatrix_internal::Components* ptr) {
971 auto schema = my_tdb_comp->array.schema();
972 if (schema.array_type() != TILEDB_SPARSE) {
973 throw std::runtime_error(
"TileDB array should be sparse");
975 my_cell_order = schema.cell_order();
980 if (!schema.has_attribute(my_attribute)) {
981 throw std::runtime_error(
"no attribute '" + my_attribute +
"' is present in the TileDB array");
983 auto attr = schema.attribute(my_attribute);
984 my_tdb_type = attr.type();
986 tiledb::Domain domain = schema.domain();
987 if (domain.ndim() != 2) {
988 throw std::runtime_error(
"TileDB array should have exactly two dimensions");
991 tiledb::Dimension first_dim = domain.dimension(0);
992 my_first_dimname = first_dim.name();
993 my_tdb_first_dim.reset(first_dim);
994 Index_ first_extent = my_tdb_first_dim.extent<Index_>();
995 Index_ first_tile = my_tdb_first_dim.tile<Index_>();
996 my_firstdim_stats = tatami_chunked::ChunkDimensionStats<Index_>(first_extent, first_tile);
998 tiledb::Dimension second_dim = domain.dimension(1);
999 my_second_dimname = second_dim.name();
1000 my_tdb_second_dim.reset(second_dim);
1001 Index_ second_extent = my_tdb_second_dim.extent<Index_>();
1002 Index_ second_tile = my_tdb_second_dim.tile<Index_>();
1003 my_seconddim_stats = tatami_chunked::ChunkDimensionStats<Index_>(second_extent, second_tile);
1006 auto tiles_per_firstdim = (second_extent / second_tile) + (second_extent % second_tile > 0);
1007 auto tiles_per_seconddim = (first_extent / first_tile) + (first_extent % first_tile > 0);
1008 my_prefer_firstdim = tiles_per_firstdim <= tiles_per_seconddim;
1013 std::shared_ptr<SparseMatrix_internal::Components> my_tdb_comp;
1014 tiledb_layout_t my_cell_order;
1015 tiledb_datatype_t my_tdb_type;
1017 std::string my_attribute;
1018 size_t my_cache_size_in_bytes;
1019 bool my_require_minimum_cache;
1021 std::string my_first_dimname, my_second_dimname;
1022 SparseMatrix_internal::Dimension my_tdb_first_dim, my_tdb_second_dim;
1023 tatami_chunked::ChunkDimensionStats<Index_> my_firstdim_stats, my_seconddim_stats;
1025 bool my_prefer_firstdim;
1028 Index_ nrow_internal()
const {
1029 return my_firstdim_stats.dimension_extent;
1032 Index_ ncol_internal()
const {
1033 return my_seconddim_stats.dimension_extent;
1037 Index_ nrow()
const {
1038 return nrow_internal();
1041 Index_ ncol()
const {
1042 return ncol_internal();
1045 bool is_sparse()
const {
1049 double is_sparse_proportion()
const {
1053 bool prefer_rows()
const {
1054 return my_prefer_firstdim;
1057 double prefer_rows_proportion()
const {
1058 return static_cast<double>(my_prefer_firstdim);
1061 bool uses_oracle(
bool)
const {
1064 return my_cache_size_in_bytes > 0;
1070 template<
typename,
typename>
class Interface_,
1071 template<
bool,
typename,
typename>
class Extractor_,
1074 std::unique_ptr<Interface_<Value_, Index_> > populate(
1076 Index_ non_target_length,
1081 const auto& target_dim_stats = (row ? my_firstdim_stats : my_seconddim_stats);
1082 const auto& target_dimname = (row ? my_first_dimname : my_second_dimname);
1083 const auto& non_target_dimname = (row ? my_second_dimname : my_first_dimname);
1084 const auto& tdb_target_dim = (row ? my_tdb_first_dim : my_tdb_second_dim);
1085 const auto& tdb_non_target_dim = (row ? my_tdb_second_dim : my_tdb_first_dim);
1087 size_t nonzero_size = 0;
1089 nonzero_size += ::tatami_tiledb::internal::determine_type_size(my_tdb_type);
1092 nonzero_size += ::tatami_tiledb::internal::determine_type_size(tdb_non_target_dim.type());
1095 if constexpr(oracle_) {
1099 nonzero_size += ::tatami_tiledb::internal::determine_type_size(tdb_target_dim.type());
1101 SparseMatrix_internal::OracularCacheParameters<Index_> cache_params;
1102 cache_params.max_cache_size_in_elements = my_cache_size_in_bytes / nonzero_size;
1115 cache_params.chunk_length = (row == (my_cell_order == TILEDB_ROW_MAJOR) ? 1 : target_dim_stats.chunk_length);
1123 size_t max_slab_size =
static_cast<size_t>(non_target_length) * cache_params.chunk_length;
1124 if (my_require_minimum_cache) {
1125 cache_params.max_cache_size_in_elements = std::max(cache_params.max_cache_size_in_elements, max_slab_size);
1126 }
else if (cache_params.max_cache_size_in_elements < max_slab_size) {
1127 cache_params.max_cache_size_in_elements = non_target_length;
1128 cache_params.chunk_length = 1;
1131 return std::make_unique<Extractor_<oracle_, Value_, Index_> >(
1135 target_dim_stats.dimension_extent,
1142 std::forward<Args_>(args)...,
1149 tatami_chunked::SlabCacheStats raw_params(
1150 target_dim_stats.chunk_length,
1152 target_dim_stats.num_chunks,
1153 my_cache_size_in_bytes,
1155 my_require_minimum_cache
1163 SparseMatrix_internal::MyopicCacheParameters<Index_> cache_params;
1164 if (raw_params.max_slabs_in_cache > 0) {
1165 cache_params.chunk_length = target_dim_stats.chunk_length;
1166 cache_params.slab_size_in_elements = raw_params.slab_size_in_elements;
1167 cache_params.max_slabs_in_cache = raw_params.max_slabs_in_cache;
1169 cache_params.chunk_length = 1;
1170 cache_params.slab_size_in_elements = non_target_length;
1171 cache_params.max_slabs_in_cache = 1;
1174 return std::make_unique<Extractor_<oracle_, Value_, Index_> >(
1178 target_dim_stats.dimension_extent,
1185 std::forward<Args_>(args)...,
1205 std::unique_ptr<tatami::MyopicDenseExtractor<Value_, Index_> > dense(
bool row,
const tatami::Options& opt)
const {
1206 Index_ full_non_target = (row ? ncol_internal() : nrow_internal());
1207 return populate<false, tatami::MyopicDenseExtractor, SparseMatrix_internal::DenseFull>(row, full_non_target,
false, set_extract_all(opt), full_non_target);
1210 std::unique_ptr<tatami::MyopicDenseExtractor<Value_, Index_> > dense(
bool row, Index_ block_start, Index_ block_length,
const tatami::Options& opt)
const {
1211 return populate<false, tatami::MyopicDenseExtractor, SparseMatrix_internal::DenseBlock>(row, block_length,
false, set_extract_all(opt), block_start, block_length);
1215 auto nidx = indices_ptr->size();
1216 return populate<false, tatami::MyopicDenseExtractor, SparseMatrix_internal::DenseIndex>(row, nidx,
false, set_extract_all(opt), std::move(indices_ptr));
1223 std::unique_ptr<tatami::MyopicSparseExtractor<Value_, Index_> > sparse(
bool row,
const tatami::Options& opt)
const {
1224 Index_ full_non_target = (row ? ncol_internal() : nrow_internal());
1225 return populate<false, tatami::MyopicSparseExtractor, SparseMatrix_internal::SparseFull>(row, full_non_target,
false, opt, full_non_target);
1228 std::unique_ptr<tatami::MyopicSparseExtractor<Value_, Index_> > sparse(
bool row, Index_ block_start, Index_ block_length,
const tatami::Options& opt)
const {
1229 return populate<false, tatami::MyopicSparseExtractor, SparseMatrix_internal::SparseBlock>(row, block_length,
false, opt, block_start, block_length);
1233 auto nidx = indices_ptr->size();
1234 return populate<false, tatami::MyopicSparseExtractor, SparseMatrix_internal::SparseIndex>(row, nidx,
false, opt, std::move(indices_ptr));
1241 std::unique_ptr<tatami::OracularDenseExtractor<Value_, Index_> > dense(
1246 Index_ full_non_target = (row ? ncol_internal() : nrow_internal());
1247 return populate<true, tatami::OracularDenseExtractor, SparseMatrix_internal::DenseFull>(row, full_non_target, std::move(oracle), set_extract_all(opt), full_non_target);
1250 std::unique_ptr<tatami::OracularDenseExtractor<Value_, Index_> > dense(
1254 Index_ block_length,
1257 return populate<true, tatami::OracularDenseExtractor, SparseMatrix_internal::DenseBlock>(row, block_length, std::move(oracle), set_extract_all(opt), block_start, block_length);
1260 std::unique_ptr<tatami::OracularDenseExtractor<Value_, Index_> > dense(
1266 auto nidx = indices_ptr->size();
1267 return populate<true, tatami::OracularDenseExtractor, SparseMatrix_internal::DenseIndex>(row, nidx, std::move(oracle), set_extract_all(opt), std::move(indices_ptr));
1274 std::unique_ptr<tatami::OracularSparseExtractor<Value_, Index_> > sparse(
1279 Index_ full_non_target = (row ? ncol_internal() : nrow_internal());
1280 return populate<true, tatami::OracularSparseExtractor, SparseMatrix_internal::SparseFull>(row, full_non_target, std::move(oracle), opt, full_non_target);
1283 std::unique_ptr<tatami::OracularSparseExtractor<Value_, Index_> > sparse(
1287 Index_ block_length,
1290 return populate<true, tatami::OracularSparseExtractor, SparseMatrix_internal::SparseBlock>(row, block_length, std::move(oracle), opt, block_start, block_length);
1293 std::unique_ptr<tatami::OracularSparseExtractor<Value_, Index_> > sparse(
1299 auto nidx = indices_ptr->size();
1300 return populate<true, tatami::OracularSparseExtractor, SparseMatrix_internal::SparseIndex>(row, nidx, std::move(oracle), opt, std::move(indices_ptr));
TileDB-backed sparse matrix.
Definition SparseMatrix.hpp:922
SparseMatrix(const std::string &uri, std::string attribute, const SparseMatrixOptions &options)
Definition SparseMatrix.hpp:939
SparseMatrix(const std::string &uri, std::string attribute)
Definition SparseMatrix.hpp:947
SparseMatrix(const std::string &uri, std::string attribute, tiledb::Context ctx, const SparseMatrixOptions &options)
Definition SparseMatrix.hpp:930
tatami bindings for TileDB matrices.
Definition DenseMatrix.hpp:20
void serialize(Function_ fun)
Definition serialize.hpp:20
std::shared_ptr< const std::vector< Index_ > > VectorPtr
typename std::conditional< oracle_, OracularSparseExtractor< Value_, Index_ >, MyopicSparseExtractor< Value_, Index_ > >::type SparseExtractor
typename std::conditional< oracle_, std::shared_ptr< const Oracle< Index_ > >, bool >::type MaybeOracle
typename std::conditional< oracle_, OracularDenseExtractor< Value_, Index_ >, MyopicDenseExtractor< Value_, Index_ > >::type DenseExtractor
void process_consecutive_indices(const Index_ *indices, Index_ length, Function_ fun)
Locking for serial access.
bool sparse_extract_index
bool sparse_extract_value
Options for sparse TileDB extraction.
Definition SparseMatrix.hpp:27
size_t maximum_cache_size
Definition SparseMatrix.hpp:37
bool require_minimum_cache
Definition SparseMatrix.hpp:44