1#ifndef TATAMI_TILEDB_SPARSE_MATRIX_HPP
2#define TATAMI_TILEDB_SPARSE_MATRIX_HPP
14#include <tiledb/tiledb>
15#include "tatami_chunked/tatami_chunked.hpp"
16#include "sanisizer/sanisizer.hpp"
53namespace SparseMatrix_internal {
55typedef ::tatami_tiledb::internal::Components Components;
56typedef ::tatami_tiledb::internal::VariablyTypedDimension Dimension;
57typedef ::tatami_tiledb::internal::VariablyTypedVector CacheBuffer;
61 CacheBuffer target_indices;
62 CacheBuffer non_target_indices;
65inline std::size_t execute_query(
66 const Components& tdb_comp,
67 tiledb::Subarray& subarray,
68 const std::string& attribute,
70 const std::string& target_dimname,
71 const std::string& non_target_dimname,
73 std::size_t general_work_offset,
74 std::size_t target_index_work_offset,
75 std::size_t work_length,
79 tiledb::Query query(tdb_comp.ctx, tdb_comp.array);
80 query.set_subarray(subarray);
81 query.set_layout(row ? TILEDB_ROW_MAJOR : TILEDB_COL_MAJOR);
83 work.target_indices.set_data_buffer(query, target_dimname, target_index_work_offset, work_length);
85 work.values.set_data_buffer(query, attribute, general_work_offset, work_length);
88 work.non_target_indices.set_data_buffer(query, non_target_dimname, general_work_offset, work_length);
91 if (query.submit() != tiledb::Query::Status::COMPLETE) {
92 throw std::runtime_error(
"failed to read sparse data from TileDB");
95 return query.result_buffer_elements()[target_dimname].second;
102template<
typename Index_>
103struct MyopicCacheParameters {
105 std::size_t slab_size_in_elements;
106 Index_ max_slabs_in_cache;
109template<
typename Index_>
113 const Components& tdb_comp,
114 const std::string& attribute,
116 Index_ target_dim_extent,
117 const std::string& target_dimname,
118 const Dimension& tdb_target_dim,
119 const std::string& non_target_dimname,
120 const Dimension& tdb_non_target_dim,
121 tiledb_datatype_t tdb_type,
122 [[maybe_unused]] Index_ non_target_length,
124 const MyopicCacheParameters<Index_>& cache_stats,
128 my_tdb_comp(tdb_comp),
129 my_attribute(attribute),
131 my_target_dim_extent(target_dim_extent),
132 my_tdb_target_dim(tdb_target_dim),
133 my_target_dimname(target_dimname),
134 my_tdb_non_target_dim(tdb_non_target_dim),
135 my_non_target_dimname(non_target_dimname),
136 my_target_chunk_length(cache_stats.chunk_length),
137 my_slab_size(cache_stats.slab_size_in_elements),
138 my_needs_value(needs_value),
139 my_needs_index(needs_index),
140 my_cache(cache_stats.max_slabs_in_cache)
143 my_work.target_indices.reset(my_tdb_target_dim.type(), my_slab_size);
145 auto total_cache_size = sanisizer::product<std::size_t>(my_slab_size, cache_stats.max_slabs_in_cache);
146 if (my_needs_value) {
147 my_work.values.reset(tdb_type, total_cache_size);
149 if (my_needs_index) {
150 my_work.non_target_indices.reset(my_tdb_non_target_dim.type(), total_cache_size);
154 sanisizer::sum<typename decltype(Slab::indptrs)::size_type>(my_target_chunk_length, 1);
158 const Components& my_tdb_comp;
159 const std::string& my_attribute;
162 Index_ my_target_dim_extent;
163 const Dimension& my_tdb_target_dim;
164 const std::string& my_target_dimname;
165 const Dimension& my_tdb_non_target_dim;
166 const std::string& my_non_target_dimname;
168 Index_ my_target_chunk_length;
169 std::size_t my_slab_size;
173 std::vector<std::pair<Index_, Index_> > my_counts;
177 std::vector<std::size_t> indptrs;
179 std::size_t my_offset = 0;
180 tatami_chunked::LruSlabCache<Index_, Slab> my_cache;
183 template<
class Configure_>
184 std::pair<std::size_t, std::size_t> fetch_raw(Index_ i, Configure_ configure) {
185 Index_ chunk = i / my_target_chunk_length;
186 Index_ index = i % my_target_chunk_length;
188 const auto& info = my_cache.find(
192 output.offset = my_offset;
193 my_offset += my_slab_size;
196 [&](Index_ id, Slab& contents) ->
void {
197 Index_ chunk_start =
id * my_target_chunk_length;
198 Index_ chunk_length = std::min(my_target_dim_extent - chunk_start, my_target_chunk_length);
200 std::size_t num_nonzero = 0;
202 tiledb::Subarray subarray(my_tdb_comp.ctx, my_tdb_comp.array);
204 my_tdb_target_dim.add_range(subarray, 1 - rowdex, chunk_start, chunk_length);
205 configure(subarray, rowdex);
206 num_nonzero = execute_query(
212 my_non_target_dimname,
222 auto& indptrs = contents.indptrs;
224 indptrs.resize(
static_cast<decltype(indptrs.size())
>(chunk_length) + 1);
227 my_work.target_indices.compact(0, num_nonzero, my_tdb_target_dim, my_counts);
228 for (
const auto& cnts : my_counts) {
229 indptrs[cnts.first - chunk_start + 1] = cnts.second;
231 for (Index_ i = 1; i <= chunk_length; ++i) {
232 indptrs[i] += indptrs[i - 1];
238 auto start = info.indptrs[index];
239 return std::make_pair(info.offset + start, info.indptrs[index + 1] - start);
243 std::pair<std::size_t, std::size_t> fetch_block(Index_ i, Index_ block_start, Index_ block_length) {
246 [&](tiledb::Subarray& subarray,
int rowdex) ->
void {
247 my_tdb_non_target_dim.add_range(subarray, rowdex, block_start, block_length);
252 std::pair<std::size_t, std::size_t> fetch_indices(Index_ i,
const std::vector<Index_>& indices) {
255 [&](tiledb::Subarray& subarray,
int rowdex) ->
void {
259 [&](Index_ s, Index_ l) ->
void {
260 my_tdb_non_target_dim.add_range(subarray, rowdex, s, l);
268 const Workspace& get_workspace()
const {
272 bool get_needs_value()
const {
273 return my_needs_value;
276 bool get_needs_index()
const {
277 return my_needs_index;
280 const Dimension& get_tdb_non_target_dim()
const {
281 return my_tdb_non_target_dim;
293template<
typename Index_>
294struct OracularCacheParameters {
296 std::size_t max_cache_size_in_elements;
299template<
typename Index_>
303 const Components& tdb_comp,
304 const std::string& attribute,
306 Index_ target_dim_extent,
307 const std::string& target_dimname,
308 const Dimension& tdb_target_dim,
309 const std::string& non_target_dimname,
310 const Dimension& tdb_non_target_dim,
311 tiledb_datatype_t tdb_type,
312 Index_ non_target_length,
314 const OracularCacheParameters<Index_>& cache_stats,
317 my_tdb_comp(tdb_comp),
318 my_attribute(attribute),
320 my_target_dim_extent(target_dim_extent),
321 my_tdb_target_dim(tdb_target_dim),
322 my_target_dimname(target_dimname),
323 my_tdb_non_target_dim(tdb_non_target_dim),
324 my_non_target_dimname(non_target_dimname),
325 my_target_chunk_length(cache_stats.chunk_length),
326 my_max_slab_size(sanisizer::product<std::size_t>(non_target_length, my_target_chunk_length)),
327 my_needs_value(needs_value),
328 my_needs_index(needs_index),
329 my_cache(std::move(oracle), cache_stats.max_cache_size_in_elements)
331 my_work.target_indices.reset(my_tdb_target_dim.type(), cache_stats.max_cache_size_in_elements);
332 if (my_needs_value) {
333 my_work.values.reset(tdb_type, cache_stats.max_cache_size_in_elements);
335 if (my_needs_index) {
336 my_work.non_target_indices.reset(my_tdb_non_target_dim.type(), cache_stats.max_cache_size_in_elements);
340 sanisizer::sum<typename decltype(Slab::indptrs)::size_type>(my_target_chunk_length, 1);
344 const Components& my_tdb_comp;
345 const std::string& my_attribute;
348 Index_ my_target_dim_extent;
349 const Dimension& my_tdb_target_dim;
350 const std::string& my_target_dimname;
351 const Dimension& my_tdb_non_target_dim;
352 const std::string& my_non_target_dimname;
354 Index_ my_target_chunk_length;
355 std::size_t my_max_slab_size;
359 std::vector<std::pair<Index_, Index_> > my_counts;
363 std::vector<std::size_t> indptrs;
365 tatami_chunked::OracularVariableSlabCache<Index_, Index_, Slab, std::size_t> my_cache;
368 template<
class Function_>
369 static void sort_by_field(std::vector<std::pair<Index_, std::size_t> >& indices, Function_ field) {
370 auto comp = [&field](
const std::pair<Index_, std::size_t>& l,
const std::pair<Index_, std::size_t>& r) ->
bool {
371 return field(l) < field(r);
373 if (!std::is_sorted(indices.begin(), indices.end(), comp)) {
374 std::sort(indices.begin(), indices.end(), comp);
378 template<
class Configure_>
379 std::pair<std::size_t, std::size_t> fetch_raw([[maybe_unused]] Index_ i, Configure_ configure) {
380 auto info = my_cache.next(
381 [&](Index_ current) -> std::pair<Index_, Index_> {
382 return std::pair<Index_, Index_>(current / my_target_chunk_length, current % my_target_chunk_length);
384 [&](Index_) -> std::size_t {
385 return my_max_slab_size;
387 [&](Index_,
const Slab& slab) -> std::size_t {
388 return slab.indptrs.back();
393 [&](std::vector<std::pair<Index_, std::size_t> >& to_populate, std::vector<std::pair<Index_, std::size_t> >& to_reuse, std::vector<Slab>& all_slabs) ->
void {
396 sort_by_field(to_reuse, [&](
const std::pair<Index_, std::size_t>& x) -> std::size_t {
return all_slabs[x.second].offset; });
397 std::size_t running_offset = 0;
398 for (
auto& x : to_reuse) {
399 auto& reused_slab = all_slabs[x.second];
400 auto& cur_offset = reused_slab.offset;
401 auto num_nonzero = reused_slab.indptrs.back();
402 if (cur_offset != running_offset) {
403 if (my_needs_value) {
404 my_work.values.shift(cur_offset, num_nonzero, running_offset);
406 if (my_needs_index) {
407 my_work.non_target_indices.shift(cur_offset, num_nonzero, running_offset);
409 cur_offset = running_offset;
411 running_offset += num_nonzero;
419 sort_by_field(to_populate, [](
const std::pair<Index_, std::size_t>& x) -> Index_ {
return x.first; });
421 std::size_t num_nonzero = 0;
423 tiledb::Subarray subarray(my_tdb_comp.ctx, my_tdb_comp.array);
425 configure(subarray, rowdex);
427 Index_ run_chunk_id = to_populate.front().first;
428 Index_ run_chunk_start = run_chunk_id * my_target_chunk_length;
429 Index_ run_length = std::min(my_target_dim_extent - run_chunk_start, my_target_chunk_length);
431 int dimdex = 1 - rowdex;
432 for (
decltype(to_populate.size()) ci = 1, cend = to_populate.size(); ci < cend; ++ci) {
433 Index_ current_chunk_id = to_populate[ci].first;
434 Index_ current_chunk_start = current_chunk_id * my_target_chunk_length;
436 if (current_chunk_id - run_chunk_id > 1) {
437 my_tdb_target_dim.add_range(subarray, dimdex, run_chunk_start, run_length);
438 run_chunk_id = current_chunk_id;
439 run_chunk_start = current_chunk_start;
443 run_length += std::min(my_target_dim_extent - current_chunk_start, my_target_chunk_length);
446 my_tdb_target_dim.add_range(subarray, dimdex, run_chunk_start, run_length);
447 num_nonzero = execute_query(
453 my_non_target_dimname,
457 sanisizer::product_unsafe<std::size_t>(to_populate.size(), my_max_slab_size),
463 my_work.target_indices.compact(running_offset, num_nonzero, my_tdb_target_dim, my_counts);
465 auto cIt = my_counts.begin(), cEnd = my_counts.end();
466 for (
auto& si : to_populate) {
467 auto& populate_slab = all_slabs[si.second];
468 populate_slab.offset = running_offset;
470 Index_ chunk_start = si.first * my_target_chunk_length;
471 Index_ chunk_length = std::min(my_target_dim_extent - chunk_start, my_target_chunk_length);
472 Index_ chunk_end = chunk_start + chunk_length;
474 auto& slab_indptrs = populate_slab.indptrs;
475 slab_indptrs.clear();
476 slab_indptrs.resize(
static_cast<decltype(slab_indptrs.size())
>(chunk_length) + 1);
478 while (cIt != cEnd && cIt->first < chunk_end) {
479 slab_indptrs[cIt->first - chunk_start + 1] = cIt->second;
483 for (Index_ i = 1; i <= chunk_length; ++i) {
484 slab_indptrs[i] += slab_indptrs[i - 1];
486 running_offset += slab_indptrs.back();
491 const auto& indptrs = info.first->indptrs;
492 auto start = indptrs[info.second];
493 return std::make_pair(info.first->offset + start, indptrs[info.second + 1] - start);
497 std::pair<std::size_t, std::size_t> fetch_block(Index_ i, Index_ block_start, Index_ block_length) {
500 [&](tiledb::Subarray& subarray,
int rowdex) ->
void {
501 my_tdb_non_target_dim.add_range(subarray, rowdex, block_start, block_length);
506 std::pair<std::size_t, std::size_t> fetch_indices(Index_ i,
const std::vector<Index_>& indices) {
509 [&](tiledb::Subarray& subarray,
int rowdex) ->
void {
513 [&](Index_ s, Index_ l) ->
void {
514 my_tdb_non_target_dim.add_range(subarray, rowdex, s, l);
522 const Workspace& get_workspace()
const {
526 bool get_needs_value()
const {
527 return my_needs_value;
530 bool get_needs_index()
const {
531 return my_needs_index;
534 const Dimension& get_tdb_non_target_dim()
const {
535 return my_tdb_non_target_dim;
539template<
bool oracle_,
typename Index_>
540using SparseCore =
typename std::conditional<oracle_, OracularCore<Index_>, MyopicCore<Index_> >::type;
542template<
bool oracle_,
typename Index_>
543using CacheParameters =
typename std::conditional<oracle_, OracularCacheParameters<Index_>, MyopicCacheParameters<Index_> >::type;
549template<
typename Value_,
typename Index_>
551 const Workspace& work,
552 std::size_t work_start,
553 std::size_t work_length,
554 const Dimension& non_target_dim,
561 output.
number = work_length;
563 work.values.copy(work_start, work_length, vbuffer);
564 output.
value = vbuffer;
567 work.non_target_indices.copy(work_start, work_length, non_target_dim, ibuffer);
568 output.
index = ibuffer;
573template<
bool oracle_,
typename Value_,
typename Index_>
577 const Components& tdb_comp,
578 const std::string& attribute,
580 Index_ target_dim_extent,
581 const std::string& target_dimname,
582 const Dimension& tdb_target_dim,
583 const std::string& non_target_dimname,
584 const Dimension& tdb_non_target_dim,
585 tiledb_datatype_t tdb_type,
587 Index_ non_target_dim,
588 const CacheParameters<oracle_, Index_>& cache_parameters,
607 my_non_target_dim(non_target_dim)
611 auto info = my_core.fetch_block(i, 0, my_non_target_dim);
612 return fill_sparse_range(my_core.get_workspace(), info.first, info.second, my_core.get_tdb_non_target_dim(), vbuffer, ibuffer, my_core.get_needs_value(), my_core.get_needs_index());
616 SparseCore<oracle_, Index_> my_core;
617 Index_ my_non_target_dim;
620template<
bool oracle_,
typename Value_,
typename Index_>
624 const Components& tdb_comp,
625 const std::string& attribute,
627 Index_ target_dim_extent,
628 const std::string& target_dimname,
629 const Dimension& tdb_target_dim,
630 const std::string& non_target_dimname,
631 const Dimension& tdb_non_target_dim,
632 tiledb_datatype_t tdb_type,
636 const CacheParameters<oracle_, Index_>& cache_parameters,
655 my_block_start(block_start),
656 my_block_length(block_length)
660 auto info = my_core.fetch_block(i, my_block_start, my_block_length);
661 return fill_sparse_range(my_core.get_workspace(), info.first, info.second, my_core.get_tdb_non_target_dim(), vbuffer, ibuffer, my_core.get_needs_value(), my_core.get_needs_index());
665 SparseCore<oracle_, Index_> my_core;
666 Index_ my_block_start, my_block_length;
669template<
bool oracle_,
typename Value_,
typename Index_>
673 const Components& tdb_comp,
674 const std::string& attribute,
676 Index_ target_dim_extent,
677 const std::string& target_dimname,
678 const Dimension& tdb_target_dim,
679 const std::string& non_target_dimname,
680 const Dimension& tdb_non_target_dim,
681 tiledb_datatype_t tdb_type,
684 const CacheParameters<oracle_, Index_>& cache_parameters,
703 my_indices_ptr(std::move(indices_ptr))
707 auto info = my_core.fetch_indices(i, *my_indices_ptr);
708 return fill_sparse_range(my_core.get_workspace(), info.first, info.second, my_core.get_tdb_non_target_dim(), vbuffer, ibuffer, my_core.get_needs_value(), my_core.get_needs_index());
712 SparseCore<oracle_, Index_> my_core;
720template<
bool oracle_,
typename Value_,
typename Index_>
724 const Components& tdb_comp,
725 const std::string& attribute,
727 Index_ target_dim_extent,
728 const std::string& target_dimname,
729 const Dimension& tdb_target_dim,
730 const std::string& non_target_dimname,
731 const Dimension& tdb_non_target_dim,
732 tiledb_datatype_t tdb_type,
734 Index_ non_target_dim_extent,
735 const CacheParameters<oracle_, Index_>& cache_parameters,
736 [[maybe_unused]]
bool needs_value,
737 [[maybe_unused]]
bool needs_index) :
748 non_target_dim_extent,
754 my_non_target_dim_extent(non_target_dim_extent),
755 my_holding_value(my_non_target_dim_extent),
756 my_holding_index(my_non_target_dim_extent)
759 const Value_* fetch(Index_ i, Value_* buffer) {
760 auto info = my_core.fetch_block(i, 0, my_non_target_dim_extent);
761 const auto& work = my_core.get_workspace();
762 work.values.copy(info.first, info.second, my_holding_value.data());
763 work.non_target_indices.copy(info.first, info.second, my_core.get_tdb_non_target_dim(), my_holding_index.data());
764 std::fill_n(buffer, my_non_target_dim_extent, 0);
765 for (
decltype(info.second) i = 0; i < info.second; ++i) {
766 buffer[my_holding_index[i]] = my_holding_value[i];
772 SparseCore<oracle_, Index_> my_core;
773 Index_ my_non_target_dim_extent;
774 std::vector<Value_> my_holding_value;
775 std::vector<Index_> my_holding_index;
778template<
bool oracle_,
typename Value_,
typename Index_>
782 const Components& tdb_comp,
783 const std::string& attribute,
785 Index_ target_dim_extent,
786 const std::string& target_dimname,
787 const Dimension& tdb_target_dim,
788 const std::string& non_target_dimname,
789 const Dimension& tdb_non_target_dim,
790 tiledb_datatype_t tdb_type,
794 const CacheParameters<oracle_, Index_>& cache_parameters,
795 [[maybe_unused]]
bool needs_value,
796 [[maybe_unused]]
bool needs_index) :
813 my_block_start(block_start),
814 my_block_length(block_length),
815 my_holding_value(block_length),
816 my_holding_index(block_length)
819 const Value_* fetch(Index_ i, Value_* buffer) {
820 auto info = my_core.fetch_block(i, my_block_start, my_block_length);
821 const auto& work = my_core.get_workspace();
822 work.values.copy(info.first, info.second, my_holding_value.data());
823 work.non_target_indices.copy(info.first, info.second, my_core.get_tdb_non_target_dim(), my_holding_index.data());
824 std::fill_n(buffer, my_block_length, 0);
825 for (
decltype(info.second) i = 0; i < info.second; ++i) {
826 buffer[my_holding_index[i] - my_block_start] = my_holding_value[i];
832 SparseCore<oracle_, Index_> my_core;
833 Index_ my_block_start, my_block_length;
834 std::vector<Value_> my_holding_value;
835 std::vector<Index_> my_holding_index;
838template<
bool oracle_,
typename Value_,
typename Index_>
842 const Components& tdb_comp,
843 const std::string& attribute,
845 Index_ target_dim_extent,
846 const std::string& target_dimname,
847 const Dimension& tdb_target_dim,
848 const std::string& non_target_dimname,
849 const Dimension& tdb_non_target_dim,
850 tiledb_datatype_t tdb_type,
853 const CacheParameters<oracle_, Index_>& cache_parameters,
854 [[maybe_unused]]
bool needs_value,
855 [[maybe_unused]]
bool needs_index) :
872 my_indices_ptr(std::move(indices_ptr)),
873 my_holding_value(my_indices_ptr->size()),
874 my_holding_index(my_indices_ptr->size())
876 const auto& indices = *my_indices_ptr;
877 if (!indices.empty()) {
878 auto idx_start = indices.front();
879 tatami::resize_container_to_Index_size(my_remapping, indices.back() - idx_start + 1);
880 for (decltype(indices.size()) j = 0, end = indices.size(); j < end; ++j) {
881 my_remapping[indices[j] - idx_start] = j;
886 const Value_* fetch(Index_ i, Value_* buffer) {
887 const auto& indices = *my_indices_ptr;
889 if (!indices.empty()) {
890 auto info = my_core.fetch_indices(i, indices);
891 const auto& work = my_core.get_workspace();
892 work.values.copy(info.first, info.second, my_holding_value.data());
893 work.non_target_indices.copy(info.first, info.second, my_core.get_tdb_non_target_dim(), my_holding_index.data());
894 auto idx_start = indices.front();
895 std::fill_n(buffer, indices.size(), 0);
896 for (
decltype(info.second) i = 0; i < info.second; ++i) {
897 buffer[my_remapping[my_holding_index[i] - idx_start]] = my_holding_value[i];
905 SparseCore<oracle_, Index_> my_core;
907 std::vector<Index_> my_remapping;
908 std::vector<Value_> my_holding_value;
909 std::vector<Index_> my_holding_index;
931template<
typename Value_,
typename Index_>
941 initialize(uri, std::move(ctx), options);
950 initialize(uri,
false, options);
960 template<
class PossibleContext_>
961 void initialize(
const std::string& uri, PossibleContext_ ctx,
const SparseMatrixOptions& options) {
967 if constexpr(std::is_same<PossibleContext_, tiledb::Context>::value) {
968 return new SparseMatrix_internal::Components(std::move(ctx), uri);
970 return new SparseMatrix_internal::Components(uri);
973 [](SparseMatrix_internal::Components* ptr) {
981 auto schema = my_tdb_comp->array.schema();
982 if (schema.array_type() != TILEDB_SPARSE) {
983 throw std::runtime_error(
"TileDB array should be sparse");
985 my_cell_order = schema.cell_order();
990 if (!schema.has_attribute(my_attribute)) {
991 throw std::runtime_error(
"no attribute '" + my_attribute +
"' is present in the TileDB array");
993 auto attr = schema.attribute(my_attribute);
994 my_tdb_type = attr.type();
996 tiledb::Domain domain = schema.domain();
997 if (domain.ndim() != 2) {
998 throw std::runtime_error(
"TileDB array should have exactly two dimensions");
1001 tiledb::Dimension first_dim = domain.dimension(0);
1002 my_first_dimname = first_dim.name();
1003 my_tdb_first_dim.reset(first_dim);
1004 Index_ first_extent = my_tdb_first_dim.extent<Index_>();
1005 Index_ first_tile = my_tdb_first_dim.tile<Index_>();
1006 my_firstdim_stats = tatami_chunked::ChunkDimensionStats<Index_>(first_extent, first_tile);
1008 tiledb::Dimension second_dim = domain.dimension(1);
1009 my_second_dimname = second_dim.name();
1010 my_tdb_second_dim.reset(second_dim);
1011 Index_ second_extent = my_tdb_second_dim.extent<Index_>();
1012 Index_ second_tile = my_tdb_second_dim.tile<Index_>();
1013 my_seconddim_stats = tatami_chunked::ChunkDimensionStats<Index_>(second_extent, second_tile);
1016 auto tiles_per_firstdim = (second_extent / second_tile) + (second_extent % second_tile > 0);
1017 auto tiles_per_seconddim = (first_extent / first_tile) + (first_extent % first_tile > 0);
1018 my_prefer_firstdim = tiles_per_firstdim <= tiles_per_seconddim;
1023 std::shared_ptr<SparseMatrix_internal::Components> my_tdb_comp;
1024 tiledb_layout_t my_cell_order;
1025 tiledb_datatype_t my_tdb_type;
1027 std::string my_attribute;
1028 std::size_t my_cache_size_in_bytes;
1029 bool my_require_minimum_cache;
1031 std::string my_first_dimname, my_second_dimname;
1032 SparseMatrix_internal::Dimension my_tdb_first_dim, my_tdb_second_dim;
1033 tatami_chunked::ChunkDimensionStats<Index_> my_firstdim_stats, my_seconddim_stats;
1035 bool my_prefer_firstdim;
1038 Index_ nrow_internal()
const {
1039 return my_firstdim_stats.dimension_extent;
1042 Index_ ncol_internal()
const {
1043 return my_seconddim_stats.dimension_extent;
1047 Index_ nrow()
const {
1048 return nrow_internal();
1051 Index_ ncol()
const {
1052 return ncol_internal();
1055 bool is_sparse()
const {
1059 double is_sparse_proportion()
const {
1063 bool prefer_rows()
const {
1064 return my_prefer_firstdim;
1067 double prefer_rows_proportion()
const {
1068 return static_cast<double>(my_prefer_firstdim);
1071 bool uses_oracle(
bool)
const {
1074 return my_cache_size_in_bytes > 0;
1080 template<
typename,
typename>
class Interface_,
1081 template<
bool,
typename,
typename>
class Extractor_,
1084 std::unique_ptr<Interface_<Value_, Index_> > populate(
1086 Index_ non_target_length,
1091 const auto& target_dim_stats = (row ? my_firstdim_stats : my_seconddim_stats);
1092 const auto& target_dimname = (row ? my_first_dimname : my_second_dimname);
1093 const auto& non_target_dimname = (row ? my_second_dimname : my_first_dimname);
1094 const auto& tdb_target_dim = (row ? my_tdb_first_dim : my_tdb_second_dim);
1095 const auto& tdb_non_target_dim = (row ? my_tdb_second_dim : my_tdb_first_dim);
1097 std::size_t nonzero_size = 0;
1099 nonzero_size += ::tatami_tiledb::internal::determine_type_size(my_tdb_type);
1102 nonzero_size += ::tatami_tiledb::internal::determine_type_size(tdb_non_target_dim.type());
1105 if constexpr(oracle_) {
1109 nonzero_size += ::tatami_tiledb::internal::determine_type_size(tdb_target_dim.type());
1111 SparseMatrix_internal::OracularCacheParameters<Index_> cache_params;
1112 cache_params.max_cache_size_in_elements = my_cache_size_in_bytes / nonzero_size;
1125 cache_params.chunk_length = (row == (my_cell_order == TILEDB_ROW_MAJOR) ? 1 : target_dim_stats.chunk_length);
1133 auto max_slab_size = sanisizer::product<std::size_t>(non_target_length, cache_params.chunk_length);
1134 if (my_require_minimum_cache) {
1135 cache_params.max_cache_size_in_elements = std::max(cache_params.max_cache_size_in_elements, max_slab_size);
1136 }
else if (cache_params.max_cache_size_in_elements < max_slab_size) {
1137 cache_params.max_cache_size_in_elements = non_target_length;
1138 cache_params.chunk_length = 1;
1141 return std::make_unique<Extractor_<oracle_, Value_, Index_> >(
1145 target_dim_stats.dimension_extent,
1152 std::forward<Args_>(args)...,
1159 tatami_chunked::SlabCacheStats<Index_> raw_params(
1160 target_dim_stats.chunk_length,
1162 target_dim_stats.num_chunks,
1163 my_cache_size_in_bytes,
1165 my_require_minimum_cache
1173 SparseMatrix_internal::MyopicCacheParameters<Index_> cache_params;
1174 if (raw_params.max_slabs_in_cache > 0) {
1175 cache_params.chunk_length = target_dim_stats.chunk_length;
1176 cache_params.slab_size_in_elements = raw_params.slab_size_in_elements;
1177 cache_params.max_slabs_in_cache = raw_params.max_slabs_in_cache;
1179 cache_params.chunk_length = 1;
1180 cache_params.slab_size_in_elements = non_target_length;
1181 cache_params.max_slabs_in_cache = 1;
1184 return std::make_unique<Extractor_<oracle_, Value_, Index_> >(
1188 target_dim_stats.dimension_extent,
1195 std::forward<Args_>(args)...,
1215 std::unique_ptr<tatami::MyopicDenseExtractor<Value_, Index_> > dense(
bool row,
const tatami::Options& opt)
const {
1216 Index_ full_non_target = (row ? ncol_internal() : nrow_internal());
1217 return populate<false, tatami::MyopicDenseExtractor, SparseMatrix_internal::DenseFull>(row, full_non_target,
false, set_extract_all(opt), full_non_target);
1220 std::unique_ptr<tatami::MyopicDenseExtractor<Value_, Index_> > dense(
bool row, Index_ block_start, Index_ block_length,
const tatami::Options& opt)
const {
1221 return populate<false, tatami::MyopicDenseExtractor, SparseMatrix_internal::DenseBlock>(row, block_length,
false, set_extract_all(opt), block_start, block_length);
1225 auto nidx = indices_ptr->size();
1226 return populate<false, tatami::MyopicDenseExtractor, SparseMatrix_internal::DenseIndex>(row, nidx,
false, set_extract_all(opt), std::move(indices_ptr));
1233 std::unique_ptr<tatami::MyopicSparseExtractor<Value_, Index_> > sparse(
bool row,
const tatami::Options& opt)
const {
1234 Index_ full_non_target = (row ? ncol_internal() : nrow_internal());
1235 return populate<false, tatami::MyopicSparseExtractor, SparseMatrix_internal::SparseFull>(row, full_non_target,
false, opt, full_non_target);
1238 std::unique_ptr<tatami::MyopicSparseExtractor<Value_, Index_> > sparse(
bool row, Index_ block_start, Index_ block_length,
const tatami::Options& opt)
const {
1239 return populate<false, tatami::MyopicSparseExtractor, SparseMatrix_internal::SparseBlock>(row, block_length,
false, opt, block_start, block_length);
1243 auto nidx = indices_ptr->size();
1244 return populate<false, tatami::MyopicSparseExtractor, SparseMatrix_internal::SparseIndex>(row, nidx,
false, opt, std::move(indices_ptr));
1251 std::unique_ptr<tatami::OracularDenseExtractor<Value_, Index_> > dense(
1256 Index_ full_non_target = (row ? ncol_internal() : nrow_internal());
1257 return populate<true, tatami::OracularDenseExtractor, SparseMatrix_internal::DenseFull>(row, full_non_target, std::move(oracle), set_extract_all(opt), full_non_target);
1260 std::unique_ptr<tatami::OracularDenseExtractor<Value_, Index_> > dense(
1264 Index_ block_length,
1267 return populate<true, tatami::OracularDenseExtractor, SparseMatrix_internal::DenseBlock>(row, block_length, std::move(oracle), set_extract_all(opt), block_start, block_length);
1270 std::unique_ptr<tatami::OracularDenseExtractor<Value_, Index_> > dense(
1276 auto nidx = indices_ptr->size();
1277 return populate<true, tatami::OracularDenseExtractor, SparseMatrix_internal::DenseIndex>(row, nidx, std::move(oracle), set_extract_all(opt), std::move(indices_ptr));
1284 std::unique_ptr<tatami::OracularSparseExtractor<Value_, Index_> > sparse(
1289 Index_ full_non_target = (row ? ncol_internal() : nrow_internal());
1290 return populate<true, tatami::OracularSparseExtractor, SparseMatrix_internal::SparseFull>(row, full_non_target, std::move(oracle), opt, full_non_target);
1293 std::unique_ptr<tatami::OracularSparseExtractor<Value_, Index_> > sparse(
1297 Index_ block_length,
1300 return populate<true, tatami::OracularSparseExtractor, SparseMatrix_internal::SparseBlock>(row, block_length, std::move(oracle), opt, block_start, block_length);
1303 std::unique_ptr<tatami::OracularSparseExtractor<Value_, Index_> > sparse(
1309 auto nidx = indices_ptr->size();
1310 return populate<true, tatami::OracularSparseExtractor, SparseMatrix_internal::SparseIndex>(row, nidx, std::move(oracle), opt, std::move(indices_ptr));
TileDB-backed sparse matrix.
Definition SparseMatrix.hpp:932
SparseMatrix(const std::string &uri, std::string attribute, const SparseMatrixOptions &options)
Definition SparseMatrix.hpp:949
SparseMatrix(const std::string &uri, std::string attribute)
Definition SparseMatrix.hpp:957
SparseMatrix(const std::string &uri, std::string attribute, tiledb::Context ctx, const SparseMatrixOptions &options)
Definition SparseMatrix.hpp:940
tatami bindings for TileDB matrices.
Definition DenseMatrix.hpp:22
void serialize(Function_ fun)
Definition serialize.hpp:20
std::shared_ptr< const std::vector< Index_ > > VectorPtr
typename std::conditional< oracle_, OracularSparseExtractor< Value_, Index_ >, MyopicSparseExtractor< Value_, Index_ > >::type SparseExtractor
typename std::conditional< oracle_, std::shared_ptr< const Oracle< Index_ > >, bool >::type MaybeOracle
typename std::conditional< oracle_, OracularDenseExtractor< Value_, Index_ >, MyopicDenseExtractor< Value_, Index_ > >::type DenseExtractor
void process_consecutive_indices(const Index_ *indices, Index_ length, Function_ fun)
Locking for serial access.
bool sparse_extract_index
bool sparse_extract_value
Options for sparse TileDB extraction.
Definition SparseMatrix.hpp:30
bool require_minimum_cache
Definition SparseMatrix.hpp:47
std::size_t maximum_cache_size
Definition SparseMatrix.hpp:40