52namespace DenseMatrix_internal {
54typedef ::tatami_tiledb::internal::Components Components;
55typedef ::tatami_tiledb::internal::VariablyTypedDimension Dimension;
56typedef ::tatami_tiledb::internal::VariablyTypedVector CacheBuffer;
58inline void execute_query(
const Components& tdb_comp,
const tiledb::Subarray& subarray,
const std::string& attribute,
bool row, CacheBuffer& buffer, std::size_t offset, std::size_t length) {
59 tiledb::Query query(tdb_comp.ctx, tdb_comp.array);
60 query.set_subarray(subarray);
61 query.set_layout(row ? TILEDB_ROW_MAJOR : TILEDB_COL_MAJOR);
62 buffer.set_data_buffer(query, attribute, offset, length);
63 if (query.submit() != tiledb::Query::Status::COMPLETE) {
64 throw std::runtime_error(
"failed to read dense data from TileDB");
72template<
typename Index_>
73struct CacheParameters {
75 std::size_t slab_size_in_elements;
76 Index_ max_slabs_in_cache;
79template<
typename Index_>
83 const Components& tdb_comp,
84 const std::string& attribute,
86 Index_ target_dim_extent,
87 const Dimension& tdb_target_dim,
88 const Dimension& tdb_non_target_dim,
89 tiledb_datatype_t tdb_type,
90 Index_ non_target_length,
92 const CacheParameters<Index_>& cache_stats
94 my_tdb_comp(tdb_comp),
95 my_attribute(attribute),
97 my_target_dim_extent(target_dim_extent),
98 my_tdb_target_dim(tdb_target_dim),
99 my_tdb_non_target_dim(tdb_non_target_dim),
100 my_non_target_length(non_target_length),
101 my_target_chunk_length(cache_stats.chunk_length),
102 my_slab_size(cache_stats.slab_size_in_elements),
103 my_holding(tdb_type, sanisizer::product<std::size_t>(my_slab_size, cache_stats.max_slabs_in_cache)),
104 my_cache(cache_stats.max_slabs_in_cache)
108 const Components& my_tdb_comp;
109 const std::string& my_attribute;
112 Index_ my_target_dim_extent;
113 const Dimension& my_tdb_target_dim;
114 const Dimension& my_tdb_non_target_dim;
116 Index_ my_non_target_length;
117 Index_ my_target_chunk_length;
118 std::size_t my_slab_size;
119 CacheBuffer my_holding;
124 std::size_t my_offset = 0;
125 tatami_chunked::LruSlabCache<Index_, Slab> my_cache;
128 template<
typename Value_,
class Configure_>
129 const Value_* fetch_raw(Index_ i, Value_* buffer, Configure_ configure) {
130 Index_ chunk = i / my_target_chunk_length;
131 Index_ index = i % my_target_chunk_length;
133 const auto& info = my_cache.find(
137 output.offset = my_offset;
138 my_offset += my_slab_size;
141 [&](Index_ id, Slab& contents) ->
void {
142 Index_ target_start =
id * my_target_chunk_length;
143 Index_ target_length = std::min(my_target_dim_extent - target_start, my_target_chunk_length);
146 tiledb::Subarray subarray(my_tdb_comp.ctx, my_tdb_comp.array);
148 configure(subarray, rowdex);
149 my_tdb_target_dim.add_range(subarray, 1 - rowdex, target_start, target_length);
150 execute_query(my_tdb_comp, subarray, my_attribute, my_row, my_holding, contents.offset, my_slab_size);
155 auto final_offset = info.offset + sanisizer::product_unsafe<std::size_t>(my_non_target_length, index);
156 my_holding.copy(final_offset, my_non_target_length, buffer);
161 template<
typename Value_>
162 const Value_* fetch_block(Index_ i, Index_ block_start, Value_* buffer) {
166 [&](tiledb::Subarray& subarray,
int rowdex) ->
void {
167 my_tdb_non_target_dim.add_range(subarray, rowdex, block_start, my_non_target_length);
172 template<
typename Value_>
173 const Value_* fetch_indices(Index_ i,
const std::vector<Index_>& indices, Value_* buffer) {
177 [&](tiledb::Subarray& subarray,
int rowdex) ->
void {
181 [&](Index_ s, Index_ l) ->
void {
182 my_tdb_non_target_dim.add_range(subarray, rowdex, s, l);
190template<
typename Index_>
194 const Components& tdb_comp,
195 const std::string& attribute,
197 Index_ target_dim_extent,
198 const Dimension& tdb_target_dim,
199 const Dimension& tdb_non_target_dim,
200 tiledb_datatype_t tdb_type,
201 Index_ non_target_length,
203 const CacheParameters<Index_>& cache_stats
205 my_tdb_comp(tdb_comp),
206 my_attribute(attribute),
208 my_target_dim_extent(target_dim_extent),
209 my_tdb_target_dim(tdb_target_dim),
210 my_tdb_non_target_dim(tdb_non_target_dim),
211 my_non_target_length(non_target_length),
212 my_target_chunk_length(cache_stats.chunk_length),
213 my_slab_size(cache_stats.slab_size_in_elements),
214 my_holding(tdb_type, sanisizer::product<std::size_t>(my_slab_size, cache_stats.max_slabs_in_cache)),
215 my_cache(std::move(oracle), cache_stats.max_slabs_in_cache)
219 const Components& my_tdb_comp;
220 const std::string& my_attribute;
223 Index_ my_target_dim_extent;
224 const Dimension& my_tdb_target_dim;
225 const Dimension& my_tdb_non_target_dim;
227 Index_ my_non_target_length;
228 Index_ my_target_chunk_length;
229 std::size_t my_slab_size;
230 CacheBuffer my_holding;
235 std::size_t my_offset = 0;
236 tatami_chunked::OracularSlabCache<Index_, Index_, Slab, true> my_cache;
239 template<
class Function_>
240 static void sort_by_field(std::vector<std::pair<Index_, Slab*> >& indices, Function_ field) {
241 auto comp = [&field](
const std::pair<Index_, Slab*>& l,
const std::pair<Index_, Slab*>& r) ->
bool {
242 return field(l) < field(r);
244 if (!std::is_sorted(indices.begin(), indices.end(), comp)) {
245 std::sort(indices.begin(), indices.end(), comp);
249 template<
typename Value_,
class Configure_>
250 const Value_* fetch_raw([[maybe_unused]] Index_ i, Value_* buffer, Configure_ configure) {
251 auto info = my_cache.next(
252 [&](Index_ current) -> std::pair<Index_, Index_> {
253 return std::pair<Index_, Index_>(current / my_target_chunk_length, current % my_target_chunk_length);
257 output.offset = my_offset;
258 my_offset += my_slab_size;
261 [&](std::vector<std::pair<Index_, Slab*> >& to_populate, std::vector<std::pair<Index_, Slab*> >& to_reuse) ->
void {
264 sort_by_field(to_reuse, [](
const std::pair<Index_, Slab*>& x) -> std::size_t {
return x.second->offset; });
265 std::size_t running_offset = 0;
266 for (
auto& x : to_reuse) {
267 auto& cur_offset = x.second->offset;
268 if (cur_offset != running_offset) {
269 my_holding.shift(cur_offset, my_slab_size, running_offset);
270 cur_offset = running_offset;
272 running_offset += my_slab_size;
280 sort_by_field(to_populate, [](
const std::pair<Index_, Slab*>& x) -> Index_ {
return x.first; });
283 tiledb::Subarray subarray(my_tdb_comp.ctx, my_tdb_comp.array);
285 configure(subarray, rowdex);
292 Index_ run_chunk_id = to_populate.front().first;
293 Index_ run_chunk_start = run_chunk_id * my_target_chunk_length;
294 Index_ run_length = std::min(my_target_dim_extent - run_chunk_start, my_target_chunk_length);
296 to_populate.front().second->offset = running_offset;
297 auto start_offset = running_offset;
298 running_offset += my_slab_size;
300 int dimdex = 1 - rowdex;
301 for (
size_t ci = 1, cend = to_populate.size(); ci < cend; ++ci) {
302 auto& current_chunk = to_populate[ci];
303 Index_ current_chunk_id = current_chunk.first;
304 Index_ current_chunk_start = current_chunk_id * my_target_chunk_length;
306 if (current_chunk_id - run_chunk_id > 1) {
307 my_tdb_target_dim.add_range(subarray, dimdex, run_chunk_start, run_length);
308 run_chunk_id = current_chunk_id;
309 run_chunk_start = current_chunk_start;
313 Index_ current_length = std::min(my_target_dim_extent - current_chunk_start, my_target_chunk_length);
314 run_length += current_length;
315 current_chunk.second->offset = running_offset;
316 running_offset += my_slab_size;
319 my_tdb_target_dim.add_range(subarray, dimdex, run_chunk_start, run_length);
320 execute_query(my_tdb_comp, subarray, my_attribute, my_row, my_holding, start_offset, running_offset - start_offset);
325 auto final_offset = info.first->offset + sanisizer::product_unsafe<std::size_t>(my_non_target_length, info.second);
326 my_holding.copy(final_offset, my_non_target_length, buffer);
331 template<
typename Value_>
332 const Value_* fetch_block(Index_ i, Index_ block_start, Value_* buffer) {
336 [&](tiledb::Subarray& subarray,
int rowdex) ->
void {
337 my_tdb_non_target_dim.add_range(subarray, rowdex, block_start, my_non_target_length);
342 template<
typename Value_>
343 const Value_* fetch_indices(Index_ i,
const std::vector<Index_>& indices, Value_* buffer) {
347 [&](tiledb::Subarray& subarray,
int rowdex) ->
void {
351 [&](Index_ s, Index_ l) ->
void {
352 my_tdb_non_target_dim.add_range(subarray, rowdex, s, l);
360template<
bool oracle_,
typename Index_>
361using DenseCore =
typename std::conditional<oracle_, OracularCore<Index_>, MyopicCore<Index_> >::type;
367template<
bool oracle_,
typename Value_,
typename Index_>
371 const Components& tdb_comp,
372 const std::string& attribute,
374 Index_ target_dim_extent,
375 const Dimension& tdb_target_dim,
376 const Dimension& tdb_non_target_dim,
377 tiledb_datatype_t tdb_type,
379 Index_ non_target_dim,
380 const CacheParameters<Index_>& cache_stats) :
395 const Value_* fetch(Index_ i, Value_* buffer) {
396 return my_core.fetch_block(i, 0, buffer);
400 DenseCore<oracle_, Index_> my_core;
403template<
bool oracle_,
typename Value_,
typename Index_>
407 const Components& tdb_comp,
408 const std::string& attribute,
410 Index_ target_dim_extent,
411 const Dimension& tdb_target_dim,
412 const Dimension& tdb_non_target_dim,
413 tiledb_datatype_t tdb_type,
417 const CacheParameters<Index_>& cache_stats) :
430 my_block_start(block_start)
433 const Value_* fetch(Index_ i, Value_* buffer) {
434 return my_core.fetch_block(i, my_block_start, buffer);
438 DenseCore<oracle_, Index_> my_core;
439 Index_ my_block_start;
442template<
bool oracle_,
typename Value_,
typename Index_>
446 const Components& tdb_comp,
447 const std::string& attribute,
449 Index_ target_dim_extent,
450 const Dimension& tdb_target_dim,
451 const Dimension& tdb_non_target_dim,
452 tiledb_datatype_t tdb_type,
455 const CacheParameters<Index_>& cache_stats) :
468 my_indices_ptr(std::move(indices_ptr))
471 const Value_* fetch(Index_ i, Value_* buffer) {
472 return my_core.fetch_indices(i, *my_indices_ptr, buffer);
476 DenseCore<oracle_, Index_> my_core;
499template<
typename Value_,
typename Index_>
509 initialize(uri, std::move(ctx), options);
518 initialize(uri,
false, options);
528 template<
class PossibleContext_>
529 void initialize(
const std::string& uri, PossibleContext_ ctx,
const DenseMatrixOptions& options) {
535 if constexpr(std::is_same<PossibleContext_, tiledb::Context>::value) {
536 return new DenseMatrix_internal::Components(std::move(ctx), uri);
538 return new DenseMatrix_internal::Components(uri);
541 [](DenseMatrix_internal::Components* ptr) ->
void {
549 auto schema = my_tdb_comp->array.schema();
550 if (schema.array_type() != TILEDB_DENSE) {
551 throw std::runtime_error(
"TileDB array should be dense");
554 if (!schema.has_attribute(my_attribute)) {
555 throw std::runtime_error(
"no attribute '" + my_attribute +
"' is present in the TileDB array");
557 auto attr = schema.attribute(my_attribute);
558 my_tdb_type = attr.type();
560 my_cache_size_in_elements = options.
maximum_cache_size / internal::determine_type_size(my_tdb_type);
563 tiledb::Domain domain = schema.domain();
564 if (domain.ndim() != 2) {
565 throw std::runtime_error(
"TileDB array should have exactly two dimensions");
568 tiledb::Dimension first_dim = domain.dimension(0);
569 my_tdb_first_dim.reset(first_dim);
570 Index_ first_extent = my_tdb_first_dim.extent<Index_>();
571 Index_ first_tile = my_tdb_first_dim.tile<Index_>();
572 my_firstdim_stats = tatami_chunked::ChunkDimensionStats<Index_>(first_extent, first_tile);
574 tiledb::Dimension second_dim = domain.dimension(1);
575 my_tdb_second_dim.reset(second_dim);
576 Index_ second_extent = my_tdb_second_dim.extent<Index_>();
577 Index_ second_tile = my_tdb_second_dim.tile<Index_>();
578 my_seconddim_stats = tatami_chunked::ChunkDimensionStats<Index_>(second_extent, second_tile);
581 auto tiles_per_firstdim = (second_extent / second_tile) + (second_extent % second_tile > 0);
582 auto tiles_per_seconddim = (first_extent / first_tile) + (first_extent % first_tile > 0);
583 my_prefer_firstdim = tiles_per_firstdim <= tiles_per_seconddim;
588 std::shared_ptr<DenseMatrix_internal::Components> my_tdb_comp;
590 DenseMatrix_internal::Dimension my_tdb_first_dim, my_tdb_second_dim;
591 tiledb_datatype_t my_tdb_type;
593 std::string my_attribute;
594 std::size_t my_cache_size_in_elements;
595 bool my_require_minimum_cache;
597 int my_first_offset, my_second_offset;
598 tatami_chunked::ChunkDimensionStats<Index_> my_firstdim_stats, my_seconddim_stats;
599 bool my_prefer_firstdim;
602 Index_ nrow_internal()
const {
603 return my_firstdim_stats.dimension_extent;
606 Index_ ncol_internal()
const {
607 return my_seconddim_stats.dimension_extent;
611 Index_ nrow()
const {
612 return nrow_internal();
615 Index_ ncol()
const {
616 return ncol_internal();
619 bool is_sparse()
const {
623 double is_sparse_proportion()
const {
627 bool prefer_rows()
const {
628 return my_prefer_firstdim;
631 double prefer_rows_proportion()
const {
632 return static_cast<double>(my_prefer_firstdim);
635 bool uses_oracle(
bool)
const {
638 return my_cache_size_in_elements > 0;
642 template<
bool oracle_,
template<
bool,
typename,
typename>
class Extractor_,
typename ... Args_>
643 std::unique_ptr<tatami::DenseExtractor<oracle_, Value_, Index_> > populate(
bool row, Index_ non_target_length,
tatami::MaybeOracle<oracle_, Index_> oracle, Args_&& ... args)
const {
644 const auto& target_dim_stats = (row ? my_firstdim_stats : my_seconddim_stats);
645 const auto& tdb_target_dim = (row ? my_tdb_first_dim : my_tdb_second_dim);
646 const auto& tdb_non_target_dim = (row ? my_tdb_second_dim : my_tdb_first_dim);
648 tatami_chunked::SlabCacheStats<Index_> slab_stats(
649 target_dim_stats.chunk_length,
651 target_dim_stats.num_chunks,
652 my_cache_size_in_elements,
653 my_require_minimum_cache
661 DenseMatrix_internal::CacheParameters<Index_> cache_params;
662 if (slab_stats.max_slabs_in_cache > 0) {
663 cache_params.chunk_length = target_dim_stats.chunk_length;
664 cache_params.slab_size_in_elements = slab_stats.slab_size_in_elements;
665 cache_params.max_slabs_in_cache = slab_stats.max_slabs_in_cache;
667 cache_params.chunk_length = 1;
668 cache_params.slab_size_in_elements = non_target_length;
669 cache_params.max_slabs_in_cache = 1;
672 return std::make_unique<Extractor_<oracle_, Value_, Index_> >(
676 target_dim_stats.dimension_extent,
681 std::forward<Args_>(args)...,
690 std::unique_ptr<tatami::MyopicDenseExtractor<Value_, Index_> > dense(
bool row,
const tatami::Options&)
const {
691 Index_ full_non_target = (row ? ncol_internal() : nrow_internal());
692 return populate<false, DenseMatrix_internal::Full>(row, full_non_target,
false, full_non_target);
695 std::unique_ptr<tatami::MyopicDenseExtractor<Value_, Index_> > dense(
bool row, Index_ block_start, Index_ block_length,
const tatami::Options&)
const {
696 return populate<false, DenseMatrix_internal::Block>(row, block_length,
false, block_start, block_length);
700 auto nidx = indices_ptr->size();
701 return populate<false, DenseMatrix_internal::Index>(row, nidx,
false, std::move(indices_ptr));
708 std::unique_ptr<tatami::MyopicSparseExtractor<Value_, Index_> > sparse(
bool row,
const tatami::Options& opt)
const {
709 Index_ full_non_target = (row ? ncol_internal() : nrow_internal());
710 return std::make_unique<tatami::FullSparsifiedWrapper<false, Value_, Index_> >(dense(row, opt), full_non_target, opt);
713 std::unique_ptr<tatami::MyopicSparseExtractor<Value_, Index_> > sparse(
bool row, Index_ block_start, Index_ block_length,
const tatami::Options& opt)
const {
714 return std::make_unique<tatami::BlockSparsifiedWrapper<false, Value_, Index_> >(dense(row, block_start, block_length, opt), block_start, block_length, opt);
718 auto ptr = dense(row, indices_ptr, opt);
719 return std::make_unique<tatami::IndexSparsifiedWrapper<false, Value_, Index_> >(std::move(ptr), std::move(indices_ptr), opt);
726 std::unique_ptr<tatami::OracularDenseExtractor<Value_, Index_> > dense(
731 Index_ full_non_target = (row ? ncol_internal() : nrow_internal());
732 return populate<true, DenseMatrix_internal::Full>(row, full_non_target, std::move(oracle), full_non_target);
735 std::unique_ptr<tatami::OracularDenseExtractor<Value_, Index_> > dense(
742 return populate<true, DenseMatrix_internal::Block>(row, block_length, std::move(oracle), block_start, block_length);
745 std::unique_ptr<tatami::OracularDenseExtractor<Value_, Index_> > dense(
751 auto nidx = indices_ptr->size();
752 return populate<true, DenseMatrix_internal::Index>(row, nidx, std::move(oracle), std::move(indices_ptr));
759 std::unique_ptr<tatami::OracularSparseExtractor<Value_, Index_> > sparse(
764 Index_ full_non_target = (row ? ncol_internal() : nrow_internal());
765 return std::make_unique<tatami::FullSparsifiedWrapper<true, Value_, Index_> >(dense(row, std::move(oracle), opt), full_non_target, opt);
768 std::unique_ptr<tatami::OracularSparseExtractor<Value_, Index_> > sparse(
775 return std::make_unique<tatami::BlockSparsifiedWrapper<true, Value_, Index_> >(dense(row, std::move(oracle), block_start, block_length, opt), block_start, block_length, opt);
778 std::unique_ptr<tatami::OracularSparseExtractor<Value_, Index_> > sparse(
784 auto ptr = dense(row, std::move(oracle), indices_ptr, opt);
785 return std::make_unique<tatami::IndexSparsifiedWrapper<true, Value_, Index_> >(std::move(ptr), std::move(indices_ptr), opt);