50namespace DenseMatrix_internal {
52typedef ::tatami_tiledb::internal::Components Components;
53typedef ::tatami_tiledb::internal::VariablyTypedDimension Dimension;
54typedef ::tatami_tiledb::internal::VariablyTypedVector CacheBuffer;
56inline void execute_query(
const Components& tdb_comp,
const tiledb::Subarray& subarray,
const std::string& attribute,
bool row, CacheBuffer& buffer,
size_t offset,
size_t length) {
57 tiledb::Query query(tdb_comp.ctx, tdb_comp.array);
58 query.set_subarray(subarray);
59 query.set_layout(row ? TILEDB_ROW_MAJOR : TILEDB_COL_MAJOR);
60 buffer.set_data_buffer(query, attribute, offset, length);
61 if (query.submit() != tiledb::Query::Status::COMPLETE) {
62 throw std::runtime_error(
"failed to read dense data from TileDB");
70template<
typename Index_>
71struct CacheParameters {
73 size_t slab_size_in_elements;
74 size_t max_slabs_in_cache;
77template<
typename Index_>
81 const Components& tdb_comp,
82 const std::string& attribute,
84 Index_ target_dim_extent,
85 const Dimension& tdb_target_dim,
86 const Dimension& tdb_non_target_dim,
87 tiledb_datatype_t tdb_type,
88 Index_ non_target_length,
90 const CacheParameters<Index_>& cache_stats) :
91 my_tdb_comp(tdb_comp),
92 my_attribute(attribute),
94 my_target_dim_extent(target_dim_extent),
95 my_tdb_target_dim(tdb_target_dim),
96 my_tdb_non_target_dim(tdb_non_target_dim),
97 my_non_target_length(non_target_length),
98 my_target_chunk_length(cache_stats.chunk_length),
99 my_slab_size(cache_stats.slab_size_in_elements),
100 my_holding(tdb_type, my_slab_size * cache_stats.max_slabs_in_cache),
101 my_cache(cache_stats.max_slabs_in_cache)
105 const Components& my_tdb_comp;
106 const std::string& my_attribute;
109 Index_ my_target_dim_extent;
110 const Dimension& my_tdb_target_dim;
111 const Dimension& my_tdb_non_target_dim;
113 Index_ my_non_target_length;
114 Index_ my_target_chunk_length;
116 CacheBuffer my_holding;
121 size_t my_offset = 0;
122 tatami_chunked::LruSlabCache<Index_, Slab> my_cache;
125 template<
typename Value_,
class Configure_>
126 const Value_* fetch_raw(Index_ i, Value_* buffer, Configure_ configure) {
127 Index_ chunk = i / my_target_chunk_length;
128 Index_ index = i % my_target_chunk_length;
130 const auto& info = my_cache.find(
134 output.offset = my_offset;
135 my_offset += my_slab_size;
138 [&](Index_ id, Slab& contents) ->
void {
139 Index_ target_start =
id * my_target_chunk_length;
140 Index_ target_length = std::min(my_target_dim_extent - target_start, my_target_chunk_length);
143 tiledb::Subarray subarray(my_tdb_comp.ctx, my_tdb_comp.array);
145 configure(subarray, rowdex);
146 my_tdb_target_dim.add_range(subarray, 1 - rowdex, target_start, target_length);
147 execute_query(my_tdb_comp, subarray, my_attribute, my_row, my_holding, contents.offset, my_slab_size);
152 size_t final_offset = info.offset +
static_cast<size_t>(my_non_target_length) *
static_cast<size_t>(index);
153 my_holding.copy(final_offset, my_non_target_length, buffer);
158 template<
typename Value_>
159 const Value_* fetch_block(Index_ i, Index_ block_start, Value_* buffer) {
160 return fetch_raw(i, buffer, [&](tiledb::Subarray& subarray,
int rowdex) {
161 my_tdb_non_target_dim.add_range(subarray, rowdex, block_start, my_non_target_length);
165 template<
typename Value_>
166 const Value_* fetch_indices(Index_ i,
const std::vector<Index_>& indices, Value_* buffer) {
167 return fetch_raw(i, buffer, [&](tiledb::Subarray& subarray,
int rowdex) {
169 my_tdb_non_target_dim.add_range(subarray, rowdex, s, l);
175template<
typename Index_>
179 const Components& tdb_comp,
180 const std::string& attribute,
182 Index_ target_dim_extent,
183 const Dimension& tdb_target_dim,
184 const Dimension& tdb_non_target_dim,
185 tiledb_datatype_t tdb_type,
186 Index_ non_target_length,
188 const CacheParameters<Index_>& cache_stats) :
189 my_tdb_comp(tdb_comp),
190 my_attribute(attribute),
192 my_target_dim_extent(target_dim_extent),
193 my_tdb_target_dim(tdb_target_dim),
194 my_tdb_non_target_dim(tdb_non_target_dim),
195 my_non_target_length(non_target_length),
196 my_target_chunk_length(cache_stats.chunk_length),
197 my_slab_size(cache_stats.slab_size_in_elements),
198 my_holding(tdb_type, my_slab_size * cache_stats.max_slabs_in_cache),
199 my_cache(std::move(oracle), cache_stats.max_slabs_in_cache)
203 const Components& my_tdb_comp;
204 const std::string& my_attribute;
207 Index_ my_target_dim_extent;
208 const Dimension& my_tdb_target_dim;
209 const Dimension& my_tdb_non_target_dim;
211 Index_ my_non_target_length;
212 Index_ my_target_chunk_length;
214 CacheBuffer my_holding;
219 size_t my_offset = 0;
220 tatami_chunked::OracularSlabCache<Index_, Index_, Slab, true> my_cache;
223 template<
class Function_>
224 static void sort_by_field(std::vector<std::pair<Index_, Slab*> >& indices, Function_ field) {
225 auto comp = [&field](
const std::pair<Index_, Slab*>& l,
const std::pair<Index_, Slab*>& r) ->
bool {
226 return field(l) < field(r);
228 if (!std::is_sorted(indices.begin(), indices.end(), comp)) {
229 std::sort(indices.begin(), indices.end(), comp);
233 template<
typename Value_,
class Configure_>
234 const Value_* fetch_raw([[maybe_unused]] Index_ i, Value_* buffer, Configure_ configure) {
235 auto info = my_cache.next(
236 [&](Index_ current) -> std::pair<Index_, Index_> {
237 return std::pair<Index_, Index_>(current / my_target_chunk_length, current % my_target_chunk_length);
241 output.offset = my_offset;
242 my_offset += my_slab_size;
245 [&](std::vector<std::pair<Index_, Slab*> >& to_populate, std::vector<std::pair<Index_, Slab*> >& to_reuse) {
248 sort_by_field(to_reuse, [](
const std::pair<Index_, Slab*>& x) ->
size_t {
return x.second->offset; });
249 size_t running_offset = 0;
250 for (
auto& x : to_reuse) {
251 auto& cur_offset = x.second->offset;
252 if (cur_offset != running_offset) {
253 my_holding.shift(cur_offset, my_slab_size, running_offset);
254 cur_offset = running_offset;
256 running_offset += my_slab_size;
264 sort_by_field(to_populate, [](
const std::pair<Index_, Slab*>& x) -> Index_ {
return x.first; });
267 tiledb::Subarray subarray(my_tdb_comp.ctx, my_tdb_comp.array);
269 configure(subarray, rowdex);
276 Index_ run_chunk_id = to_populate.front().first;
277 Index_ run_chunk_start = run_chunk_id * my_target_chunk_length;
278 Index_ run_length = std::min(my_target_dim_extent - run_chunk_start, my_target_chunk_length);
280 to_populate.front().second->offset = running_offset;
281 auto start_offset = running_offset;
282 running_offset += my_slab_size;
284 int dimdex = 1 - rowdex;
285 for (
size_t ci = 1, cend = to_populate.size(); ci < cend; ++ci) {
286 auto& current_chunk = to_populate[ci];
287 Index_ current_chunk_id = current_chunk.first;
288 Index_ current_chunk_start = current_chunk_id * my_target_chunk_length;
290 if (current_chunk_id - run_chunk_id > 1) {
291 my_tdb_target_dim.add_range(subarray, dimdex, run_chunk_start, run_length);
292 run_chunk_id = current_chunk_id;
293 run_chunk_start = current_chunk_start;
297 Index_ current_length = std::min(my_target_dim_extent - current_chunk_start, my_target_chunk_length);
298 run_length += current_length;
299 current_chunk.second->offset = running_offset;
300 running_offset += my_slab_size;
303 my_tdb_target_dim.add_range(subarray, dimdex, run_chunk_start, run_length);
304 execute_query(my_tdb_comp, subarray, my_attribute, my_row, my_holding, start_offset, running_offset - start_offset);
309 size_t final_offset = info.first->offset + my_non_target_length *
static_cast<size_t>(info.second);
310 my_holding.copy(final_offset, my_non_target_length, buffer);
315 template<
typename Value_>
316 const Value_* fetch_block(Index_ i, Index_ block_start, Value_* buffer) {
317 return fetch_raw(i, buffer, [&](tiledb::Subarray& subarray,
int rowdex) {
318 my_tdb_non_target_dim.add_range(subarray, rowdex, block_start, my_non_target_length);
322 template<
typename Value_>
323 const Value_* fetch_indices(Index_ i,
const std::vector<Index_>& indices, Value_* buffer) {
324 return fetch_raw(i, buffer, [&](tiledb::Subarray& subarray,
int rowdex) {
326 my_tdb_non_target_dim.add_range(subarray, rowdex, s, l);
332template<
bool oracle_,
typename Index_>
333using DenseCore =
typename std::conditional<oracle_, OracularCore<Index_>, MyopicCore<Index_> >::type;
339template<
bool oracle_,
typename Value_,
typename Index_>
343 const Components& tdb_comp,
344 const std::string& attribute,
346 Index_ target_dim_extent,
347 const Dimension& tdb_target_dim,
348 const Dimension& tdb_non_target_dim,
349 tiledb_datatype_t tdb_type,
351 Index_ non_target_dim,
352 const CacheParameters<Index_>& cache_stats) :
367 const Value_* fetch(Index_ i, Value_* buffer) {
368 return my_core.fetch_block(i, 0, buffer);
372 DenseCore<oracle_, Index_> my_core;
375template<
bool oracle_,
typename Value_,
typename Index_>
379 const Components& tdb_comp,
380 const std::string& attribute,
382 Index_ target_dim_extent,
383 const Dimension& tdb_target_dim,
384 const Dimension& tdb_non_target_dim,
385 tiledb_datatype_t tdb_type,
389 const CacheParameters<Index_>& cache_stats) :
402 my_block_start(block_start)
405 const Value_* fetch(Index_ i, Value_* buffer) {
406 return my_core.fetch_block(i, my_block_start, buffer);
410 DenseCore<oracle_, Index_> my_core;
411 Index_ my_block_start;
414template<
bool oracle_,
typename Value_,
typename Index_>
418 const Components& tdb_comp,
419 const std::string& attribute,
421 Index_ target_dim_extent,
422 const Dimension& tdb_target_dim,
423 const Dimension& tdb_non_target_dim,
424 tiledb_datatype_t tdb_type,
427 const CacheParameters<Index_>& cache_stats) :
440 my_indices_ptr(std::move(indices_ptr))
443 const Value_* fetch(Index_ i, Value_* buffer) {
444 return my_core.fetch_indices(i, *my_indices_ptr, buffer);
448 DenseCore<oracle_, Index_> my_core;
471template<
typename Value_,
typename Index_>
481 initialize(uri, std::move(ctx), options);
490 initialize(uri,
false, options);
500 template<
class PossibleContext_>
501 void initialize(
const std::string& uri, PossibleContext_ ctx,
const DenseMatrixOptions& options) {
507 if constexpr(std::is_same<PossibleContext_, tiledb::Context>::value) {
508 return new DenseMatrix_internal::Components(std::move(ctx), uri);
510 return new DenseMatrix_internal::Components(uri);
513 [](DenseMatrix_internal::Components* ptr) {
521 auto schema = my_tdb_comp->array.schema();
522 if (schema.array_type() != TILEDB_DENSE) {
523 throw std::runtime_error(
"TileDB array should be dense");
526 if (!schema.has_attribute(my_attribute)) {
527 throw std::runtime_error(
"no attribute '" + my_attribute +
"' is present in the TileDB array");
529 auto attr = schema.attribute(my_attribute);
530 my_tdb_type = attr.type();
532 my_cache_size_in_elements = options.
maximum_cache_size / internal::determine_type_size(my_tdb_type);
535 tiledb::Domain domain = schema.domain();
536 if (domain.ndim() != 2) {
537 throw std::runtime_error(
"TileDB array should have exactly two dimensions");
540 tiledb::Dimension first_dim = domain.dimension(0);
541 my_tdb_first_dim.reset(first_dim);
542 Index_ first_extent = my_tdb_first_dim.extent<Index_>();
543 Index_ first_tile = my_tdb_first_dim.tile<Index_>();
544 my_firstdim_stats = tatami_chunked::ChunkDimensionStats<Index_>(first_extent, first_tile);
546 tiledb::Dimension second_dim = domain.dimension(1);
547 my_tdb_second_dim.reset(second_dim);
548 Index_ second_extent = my_tdb_second_dim.extent<Index_>();
549 Index_ second_tile = my_tdb_second_dim.tile<Index_>();
550 my_seconddim_stats = tatami_chunked::ChunkDimensionStats<Index_>(second_extent, second_tile);
553 auto tiles_per_firstdim = (second_extent / second_tile) + (second_extent % second_tile > 0);
554 auto tiles_per_seconddim = (first_extent / first_tile) + (first_extent % first_tile > 0);
555 my_prefer_firstdim = tiles_per_firstdim <= tiles_per_seconddim;
560 std::shared_ptr<DenseMatrix_internal::Components> my_tdb_comp;
562 DenseMatrix_internal::Dimension my_tdb_first_dim, my_tdb_second_dim;
563 tiledb_datatype_t my_tdb_type;
565 std::string my_attribute;
566 size_t my_cache_size_in_elements;
567 bool my_require_minimum_cache;
569 int my_first_offset, my_second_offset;
570 tatami_chunked::ChunkDimensionStats<Index_> my_firstdim_stats, my_seconddim_stats;
571 bool my_prefer_firstdim;
574 Index_ nrow_internal()
const {
575 return my_firstdim_stats.dimension_extent;
578 Index_ ncol_internal()
const {
579 return my_seconddim_stats.dimension_extent;
583 Index_ nrow()
const {
584 return nrow_internal();
587 Index_ ncol()
const {
588 return ncol_internal();
591 bool is_sparse()
const {
595 double is_sparse_proportion()
const {
599 bool prefer_rows()
const {
600 return my_prefer_firstdim;
603 double prefer_rows_proportion()
const {
604 return static_cast<double>(my_prefer_firstdim);
607 bool uses_oracle(
bool)
const {
610 return my_cache_size_in_elements > 0;
614 template<
bool oracle_,
template<
bool,
typename,
typename>
class Extractor_,
typename ... Args_>
615 std::unique_ptr<tatami::DenseExtractor<oracle_, Value_, Index_> > populate(
bool row, Index_ non_target_length,
tatami::MaybeOracle<oracle_, Index_> oracle, Args_&& ... args)
const {
616 const auto& target_dim_stats = (row ? my_firstdim_stats : my_seconddim_stats);
617 const auto& tdb_target_dim = (row ? my_tdb_first_dim : my_tdb_second_dim);
618 const auto& tdb_non_target_dim = (row ? my_tdb_second_dim : my_tdb_first_dim);
620 tatami_chunked::SlabCacheStats slab_stats(
621 target_dim_stats.chunk_length,
623 target_dim_stats.num_chunks,
624 my_cache_size_in_elements,
625 my_require_minimum_cache
633 DenseMatrix_internal::CacheParameters<Index_> cache_params;
634 if (slab_stats.max_slabs_in_cache > 0) {
635 cache_params.chunk_length = target_dim_stats.chunk_length;
636 cache_params.slab_size_in_elements = slab_stats.slab_size_in_elements;
637 cache_params.max_slabs_in_cache = slab_stats.max_slabs_in_cache;
639 cache_params.chunk_length = 1;
640 cache_params.slab_size_in_elements = non_target_length;
641 cache_params.max_slabs_in_cache = 1;
644 return std::make_unique<Extractor_<oracle_, Value_, Index_> >(
648 target_dim_stats.dimension_extent,
653 std::forward<Args_>(args)...,
662 std::unique_ptr<tatami::MyopicDenseExtractor<Value_, Index_> > dense(
bool row,
const tatami::Options&)
const {
663 Index_ full_non_target = (row ? ncol_internal() : nrow_internal());
664 return populate<false, DenseMatrix_internal::Full>(row, full_non_target,
false, full_non_target);
667 std::unique_ptr<tatami::MyopicDenseExtractor<Value_, Index_> > dense(
bool row, Index_ block_start, Index_ block_length,
const tatami::Options&)
const {
668 return populate<false, DenseMatrix_internal::Block>(row, block_length,
false, block_start, block_length);
672 auto nidx = indices_ptr->size();
673 return populate<false, DenseMatrix_internal::Index>(row, nidx,
false, std::move(indices_ptr));
680 std::unique_ptr<tatami::MyopicSparseExtractor<Value_, Index_> > sparse(
bool row,
const tatami::Options& opt)
const {
681 Index_ full_non_target = (row ? ncol_internal() : nrow_internal());
682 return std::make_unique<tatami::FullSparsifiedWrapper<false, Value_, Index_> >(dense(row, opt), full_non_target, opt);
685 std::unique_ptr<tatami::MyopicSparseExtractor<Value_, Index_> > sparse(
bool row, Index_ block_start, Index_ block_length,
const tatami::Options& opt)
const {
686 return std::make_unique<tatami::BlockSparsifiedWrapper<false, Value_, Index_> >(dense(row, block_start, block_length, opt), block_start, block_length, opt);
690 auto ptr = dense(row, indices_ptr, opt);
691 return std::make_unique<tatami::IndexSparsifiedWrapper<false, Value_, Index_> >(std::move(ptr), std::move(indices_ptr), opt);
698 std::unique_ptr<tatami::OracularDenseExtractor<Value_, Index_> > dense(
703 Index_ full_non_target = (row ? ncol_internal() : nrow_internal());
704 return populate<true, DenseMatrix_internal::Full>(row, full_non_target, std::move(oracle), full_non_target);
707 std::unique_ptr<tatami::OracularDenseExtractor<Value_, Index_> > dense(
714 return populate<true, DenseMatrix_internal::Block>(row, block_length, std::move(oracle), block_start, block_length);
717 std::unique_ptr<tatami::OracularDenseExtractor<Value_, Index_> > dense(
723 auto nidx = indices_ptr->size();
724 return populate<true, DenseMatrix_internal::Index>(row, nidx, std::move(oracle), std::move(indices_ptr));
731 std::unique_ptr<tatami::OracularSparseExtractor<Value_, Index_> > sparse(
736 Index_ full_non_target = (row ? ncol_internal() : nrow_internal());
737 return std::make_unique<tatami::FullSparsifiedWrapper<true, Value_, Index_> >(dense(row, std::move(oracle), opt), full_non_target, opt);
740 std::unique_ptr<tatami::OracularSparseExtractor<Value_, Index_> > sparse(
747 return std::make_unique<tatami::BlockSparsifiedWrapper<true, Value_, Index_> >(dense(row, std::move(oracle), block_start, block_length, opt), block_start, block_length, opt);
750 std::unique_ptr<tatami::OracularSparseExtractor<Value_, Index_> > sparse(
756 auto ptr = dense(row, std::move(oracle), indices_ptr, opt);
757 return std::make_unique<tatami::IndexSparsifiedWrapper<true, Value_, Index_> >(std::move(ptr), std::move(indices_ptr), opt);