50namespace DenseMatrix_internal {
52typedef ::tatami_tiledb::internal::Components Components;
53typedef ::tatami_tiledb::internal::VariablyTypedDimension Dimension;
54typedef ::tatami_tiledb::internal::VariablyTypedVector CacheBuffer;
56inline void execute_query(
const Components& tdb_comp,
const tiledb::Subarray& subarray,
const std::string& attribute,
bool row, CacheBuffer& buffer,
size_t offset,
size_t length) {
57 tiledb::Query query(tdb_comp.ctx, tdb_comp.array);
58 query.set_subarray(subarray);
59 query.set_layout(row ? TILEDB_ROW_MAJOR : TILEDB_COL_MAJOR);
60 buffer.set_data_buffer(query, attribute, offset, length);
61 if (query.submit() != tiledb::Query::Status::COMPLETE) {
62 throw std::runtime_error(
"failed to read dense data from TileDB");
70template<
typename Index_>
71struct CacheParameters {
73 size_t slab_size_in_elements;
74 size_t max_slabs_in_cache;
77template<
typename Index_>
81 const Components& tdb_comp,
82 const std::string& attribute,
84 Index_ target_dim_extent,
85 const Dimension& tdb_target_dim,
86 const Dimension& tdb_non_target_dim,
87 tiledb_datatype_t tdb_type,
88 Index_ non_target_length,
90 const CacheParameters<Index_>& cache_stats) :
91 my_tdb_comp(tdb_comp),
92 my_attribute(attribute),
94 my_target_dim_extent(target_dim_extent),
95 my_tdb_target_dim(tdb_target_dim),
96 my_tdb_non_target_dim(tdb_non_target_dim),
97 my_non_target_length(non_target_length),
98 my_target_chunk_length(cache_stats.chunk_length),
99 my_slab_size(cache_stats.slab_size_in_elements),
100 my_holding(tdb_type, my_slab_size * cache_stats.max_slabs_in_cache),
101 my_cache(cache_stats.max_slabs_in_cache)
105 const Components& my_tdb_comp;
106 const std::string& my_attribute;
109 Index_ my_target_dim_extent;
110 const Dimension& my_tdb_target_dim;
111 const Dimension& my_tdb_non_target_dim;
113 Index_ my_non_target_length;
114 Index_ my_target_chunk_length;
116 CacheBuffer my_holding;
121 size_t my_offset = 0;
122 tatami_chunked::LruSlabCache<Index_, Slab> my_cache;
125 template<
typename Value_,
class Configure_>
126 const Value_* fetch_raw(Index_ i, Value_* buffer, Configure_ configure) {
127 Index_ chunk = i / my_target_chunk_length;
128 Index_ index = i % my_target_chunk_length;
130 const auto& info = my_cache.find(
134 output.offset = my_offset;
135 my_offset += my_slab_size;
138 [&](Index_ id, Slab& contents) ->
void {
139 Index_ target_start =
id * my_target_chunk_length;
140 Index_ target_length = std::min(my_target_dim_extent - target_start, my_target_chunk_length);
143 tiledb::Subarray subarray(my_tdb_comp.ctx, my_tdb_comp.array);
145 configure(subarray, rowdex);
146 my_tdb_target_dim.add_range(subarray, 1 - rowdex, target_start, target_length);
147 execute_query(my_tdb_comp, subarray, my_attribute, my_row, my_holding, contents.offset, my_slab_size);
152 size_t final_offset = info.offset +
static_cast<size_t>(my_non_target_length) *
static_cast<size_t>(index);
153 my_holding.copy(final_offset, my_non_target_length, buffer);
158 template<
typename Value_>
159 const Value_* fetch_block(Index_ i, Index_ block_start, Value_* buffer) {
163 [&](tiledb::Subarray& subarray,
int rowdex) ->
void {
164 my_tdb_non_target_dim.add_range(subarray, rowdex, block_start, my_non_target_length);
169 template<
typename Value_>
170 const Value_* fetch_indices(Index_ i,
const std::vector<Index_>& indices, Value_* buffer) {
174 [&](tiledb::Subarray& subarray,
int rowdex) ->
void {
178 [&](Index_ s, Index_ l) ->
void {
179 my_tdb_non_target_dim.add_range(subarray, rowdex, s, l);
187template<
typename Index_>
191 const Components& tdb_comp,
192 const std::string& attribute,
194 Index_ target_dim_extent,
195 const Dimension& tdb_target_dim,
196 const Dimension& tdb_non_target_dim,
197 tiledb_datatype_t tdb_type,
198 Index_ non_target_length,
200 const CacheParameters<Index_>& cache_stats) :
201 my_tdb_comp(tdb_comp),
202 my_attribute(attribute),
204 my_target_dim_extent(target_dim_extent),
205 my_tdb_target_dim(tdb_target_dim),
206 my_tdb_non_target_dim(tdb_non_target_dim),
207 my_non_target_length(non_target_length),
208 my_target_chunk_length(cache_stats.chunk_length),
209 my_slab_size(cache_stats.slab_size_in_elements),
210 my_holding(tdb_type, my_slab_size * cache_stats.max_slabs_in_cache),
211 my_cache(std::move(oracle), cache_stats.max_slabs_in_cache)
215 const Components& my_tdb_comp;
216 const std::string& my_attribute;
219 Index_ my_target_dim_extent;
220 const Dimension& my_tdb_target_dim;
221 const Dimension& my_tdb_non_target_dim;
223 Index_ my_non_target_length;
224 Index_ my_target_chunk_length;
226 CacheBuffer my_holding;
231 size_t my_offset = 0;
232 tatami_chunked::OracularSlabCache<Index_, Index_, Slab, true> my_cache;
235 template<
class Function_>
236 static void sort_by_field(std::vector<std::pair<Index_, Slab*> >& indices, Function_ field) {
237 auto comp = [&field](
const std::pair<Index_, Slab*>& l,
const std::pair<Index_, Slab*>& r) ->
bool {
238 return field(l) < field(r);
240 if (!std::is_sorted(indices.begin(), indices.end(), comp)) {
241 std::sort(indices.begin(), indices.end(), comp);
245 template<
typename Value_,
class Configure_>
246 const Value_* fetch_raw([[maybe_unused]] Index_ i, Value_* buffer, Configure_ configure) {
247 auto info = my_cache.next(
248 [&](Index_ current) -> std::pair<Index_, Index_> {
249 return std::pair<Index_, Index_>(current / my_target_chunk_length, current % my_target_chunk_length);
253 output.offset = my_offset;
254 my_offset += my_slab_size;
257 [&](std::vector<std::pair<Index_, Slab*> >& to_populate, std::vector<std::pair<Index_, Slab*> >& to_reuse) ->
void {
260 sort_by_field(to_reuse, [](
const std::pair<Index_, Slab*>& x) ->
size_t {
return x.second->offset; });
261 size_t running_offset = 0;
262 for (
auto& x : to_reuse) {
263 auto& cur_offset = x.second->offset;
264 if (cur_offset != running_offset) {
265 my_holding.shift(cur_offset, my_slab_size, running_offset);
266 cur_offset = running_offset;
268 running_offset += my_slab_size;
276 sort_by_field(to_populate, [](
const std::pair<Index_, Slab*>& x) -> Index_ {
return x.first; });
279 tiledb::Subarray subarray(my_tdb_comp.ctx, my_tdb_comp.array);
281 configure(subarray, rowdex);
288 Index_ run_chunk_id = to_populate.front().first;
289 Index_ run_chunk_start = run_chunk_id * my_target_chunk_length;
290 Index_ run_length = std::min(my_target_dim_extent - run_chunk_start, my_target_chunk_length);
292 to_populate.front().second->offset = running_offset;
293 auto start_offset = running_offset;
294 running_offset += my_slab_size;
296 int dimdex = 1 - rowdex;
297 for (
size_t ci = 1, cend = to_populate.size(); ci < cend; ++ci) {
298 auto& current_chunk = to_populate[ci];
299 Index_ current_chunk_id = current_chunk.first;
300 Index_ current_chunk_start = current_chunk_id * my_target_chunk_length;
302 if (current_chunk_id - run_chunk_id > 1) {
303 my_tdb_target_dim.add_range(subarray, dimdex, run_chunk_start, run_length);
304 run_chunk_id = current_chunk_id;
305 run_chunk_start = current_chunk_start;
309 Index_ current_length = std::min(my_target_dim_extent - current_chunk_start, my_target_chunk_length);
310 run_length += current_length;
311 current_chunk.second->offset = running_offset;
312 running_offset += my_slab_size;
315 my_tdb_target_dim.add_range(subarray, dimdex, run_chunk_start, run_length);
316 execute_query(my_tdb_comp, subarray, my_attribute, my_row, my_holding, start_offset, running_offset - start_offset);
321 size_t final_offset = info.first->offset + my_non_target_length *
static_cast<size_t>(info.second);
322 my_holding.copy(final_offset, my_non_target_length, buffer);
327 template<
typename Value_>
328 const Value_* fetch_block(Index_ i, Index_ block_start, Value_* buffer) {
332 [&](tiledb::Subarray& subarray,
int rowdex) ->
void {
333 my_tdb_non_target_dim.add_range(subarray, rowdex, block_start, my_non_target_length);
338 template<
typename Value_>
339 const Value_* fetch_indices(Index_ i,
const std::vector<Index_>& indices, Value_* buffer) {
343 [&](tiledb::Subarray& subarray,
int rowdex) ->
void {
347 [&](Index_ s, Index_ l) ->
void {
348 my_tdb_non_target_dim.add_range(subarray, rowdex, s, l);
356template<
bool oracle_,
typename Index_>
357using DenseCore =
typename std::conditional<oracle_, OracularCore<Index_>, MyopicCore<Index_> >::type;
363template<
bool oracle_,
typename Value_,
typename Index_>
367 const Components& tdb_comp,
368 const std::string& attribute,
370 Index_ target_dim_extent,
371 const Dimension& tdb_target_dim,
372 const Dimension& tdb_non_target_dim,
373 tiledb_datatype_t tdb_type,
375 Index_ non_target_dim,
376 const CacheParameters<Index_>& cache_stats) :
391 const Value_* fetch(Index_ i, Value_* buffer) {
392 return my_core.fetch_block(i, 0, buffer);
396 DenseCore<oracle_, Index_> my_core;
399template<
bool oracle_,
typename Value_,
typename Index_>
403 const Components& tdb_comp,
404 const std::string& attribute,
406 Index_ target_dim_extent,
407 const Dimension& tdb_target_dim,
408 const Dimension& tdb_non_target_dim,
409 tiledb_datatype_t tdb_type,
413 const CacheParameters<Index_>& cache_stats) :
426 my_block_start(block_start)
429 const Value_* fetch(Index_ i, Value_* buffer) {
430 return my_core.fetch_block(i, my_block_start, buffer);
434 DenseCore<oracle_, Index_> my_core;
435 Index_ my_block_start;
438template<
bool oracle_,
typename Value_,
typename Index_>
442 const Components& tdb_comp,
443 const std::string& attribute,
445 Index_ target_dim_extent,
446 const Dimension& tdb_target_dim,
447 const Dimension& tdb_non_target_dim,
448 tiledb_datatype_t tdb_type,
451 const CacheParameters<Index_>& cache_stats) :
464 my_indices_ptr(std::move(indices_ptr))
467 const Value_* fetch(Index_ i, Value_* buffer) {
468 return my_core.fetch_indices(i, *my_indices_ptr, buffer);
472 DenseCore<oracle_, Index_> my_core;
495template<
typename Value_,
typename Index_>
505 initialize(uri, std::move(ctx), options);
514 initialize(uri,
false, options);
524 template<
class PossibleContext_>
525 void initialize(
const std::string& uri, PossibleContext_ ctx,
const DenseMatrixOptions& options) {
531 if constexpr(std::is_same<PossibleContext_, tiledb::Context>::value) {
532 return new DenseMatrix_internal::Components(std::move(ctx), uri);
534 return new DenseMatrix_internal::Components(uri);
537 [](DenseMatrix_internal::Components* ptr) ->
void {
545 auto schema = my_tdb_comp->array.schema();
546 if (schema.array_type() != TILEDB_DENSE) {
547 throw std::runtime_error(
"TileDB array should be dense");
550 if (!schema.has_attribute(my_attribute)) {
551 throw std::runtime_error(
"no attribute '" + my_attribute +
"' is present in the TileDB array");
553 auto attr = schema.attribute(my_attribute);
554 my_tdb_type = attr.type();
556 my_cache_size_in_elements = options.
maximum_cache_size / internal::determine_type_size(my_tdb_type);
559 tiledb::Domain domain = schema.domain();
560 if (domain.ndim() != 2) {
561 throw std::runtime_error(
"TileDB array should have exactly two dimensions");
564 tiledb::Dimension first_dim = domain.dimension(0);
565 my_tdb_first_dim.reset(first_dim);
566 Index_ first_extent = my_tdb_first_dim.extent<Index_>();
567 Index_ first_tile = my_tdb_first_dim.tile<Index_>();
568 my_firstdim_stats = tatami_chunked::ChunkDimensionStats<Index_>(first_extent, first_tile);
570 tiledb::Dimension second_dim = domain.dimension(1);
571 my_tdb_second_dim.reset(second_dim);
572 Index_ second_extent = my_tdb_second_dim.extent<Index_>();
573 Index_ second_tile = my_tdb_second_dim.tile<Index_>();
574 my_seconddim_stats = tatami_chunked::ChunkDimensionStats<Index_>(second_extent, second_tile);
577 auto tiles_per_firstdim = (second_extent / second_tile) + (second_extent % second_tile > 0);
578 auto tiles_per_seconddim = (first_extent / first_tile) + (first_extent % first_tile > 0);
579 my_prefer_firstdim = tiles_per_firstdim <= tiles_per_seconddim;
584 std::shared_ptr<DenseMatrix_internal::Components> my_tdb_comp;
586 DenseMatrix_internal::Dimension my_tdb_first_dim, my_tdb_second_dim;
587 tiledb_datatype_t my_tdb_type;
589 std::string my_attribute;
590 size_t my_cache_size_in_elements;
591 bool my_require_minimum_cache;
593 int my_first_offset, my_second_offset;
594 tatami_chunked::ChunkDimensionStats<Index_> my_firstdim_stats, my_seconddim_stats;
595 bool my_prefer_firstdim;
598 Index_ nrow_internal()
const {
599 return my_firstdim_stats.dimension_extent;
602 Index_ ncol_internal()
const {
603 return my_seconddim_stats.dimension_extent;
607 Index_ nrow()
const {
608 return nrow_internal();
611 Index_ ncol()
const {
612 return ncol_internal();
615 bool is_sparse()
const {
619 double is_sparse_proportion()
const {
623 bool prefer_rows()
const {
624 return my_prefer_firstdim;
627 double prefer_rows_proportion()
const {
628 return static_cast<double>(my_prefer_firstdim);
631 bool uses_oracle(
bool)
const {
634 return my_cache_size_in_elements > 0;
638 template<
bool oracle_,
template<
bool,
typename,
typename>
class Extractor_,
typename ... Args_>
639 std::unique_ptr<tatami::DenseExtractor<oracle_, Value_, Index_> > populate(
bool row, Index_ non_target_length,
tatami::MaybeOracle<oracle_, Index_> oracle, Args_&& ... args)
const {
640 const auto& target_dim_stats = (row ? my_firstdim_stats : my_seconddim_stats);
641 const auto& tdb_target_dim = (row ? my_tdb_first_dim : my_tdb_second_dim);
642 const auto& tdb_non_target_dim = (row ? my_tdb_second_dim : my_tdb_first_dim);
644 tatami_chunked::SlabCacheStats slab_stats(
645 target_dim_stats.chunk_length,
647 target_dim_stats.num_chunks,
648 my_cache_size_in_elements,
649 my_require_minimum_cache
657 DenseMatrix_internal::CacheParameters<Index_> cache_params;
658 if (slab_stats.max_slabs_in_cache > 0) {
659 cache_params.chunk_length = target_dim_stats.chunk_length;
660 cache_params.slab_size_in_elements = slab_stats.slab_size_in_elements;
661 cache_params.max_slabs_in_cache = slab_stats.max_slabs_in_cache;
663 cache_params.chunk_length = 1;
664 cache_params.slab_size_in_elements = non_target_length;
665 cache_params.max_slabs_in_cache = 1;
668 return std::make_unique<Extractor_<oracle_, Value_, Index_> >(
672 target_dim_stats.dimension_extent,
677 std::forward<Args_>(args)...,
686 std::unique_ptr<tatami::MyopicDenseExtractor<Value_, Index_> > dense(
bool row,
const tatami::Options&)
const {
687 Index_ full_non_target = (row ? ncol_internal() : nrow_internal());
688 return populate<false, DenseMatrix_internal::Full>(row, full_non_target,
false, full_non_target);
691 std::unique_ptr<tatami::MyopicDenseExtractor<Value_, Index_> > dense(
bool row, Index_ block_start, Index_ block_length,
const tatami::Options&)
const {
692 return populate<false, DenseMatrix_internal::Block>(row, block_length,
false, block_start, block_length);
696 auto nidx = indices_ptr->size();
697 return populate<false, DenseMatrix_internal::Index>(row, nidx,
false, std::move(indices_ptr));
704 std::unique_ptr<tatami::MyopicSparseExtractor<Value_, Index_> > sparse(
bool row,
const tatami::Options& opt)
const {
705 Index_ full_non_target = (row ? ncol_internal() : nrow_internal());
706 return std::make_unique<tatami::FullSparsifiedWrapper<false, Value_, Index_> >(dense(row, opt), full_non_target, opt);
709 std::unique_ptr<tatami::MyopicSparseExtractor<Value_, Index_> > sparse(
bool row, Index_ block_start, Index_ block_length,
const tatami::Options& opt)
const {
710 return std::make_unique<tatami::BlockSparsifiedWrapper<false, Value_, Index_> >(dense(row, block_start, block_length, opt), block_start, block_length, opt);
714 auto ptr = dense(row, indices_ptr, opt);
715 return std::make_unique<tatami::IndexSparsifiedWrapper<false, Value_, Index_> >(std::move(ptr), std::move(indices_ptr), opt);
722 std::unique_ptr<tatami::OracularDenseExtractor<Value_, Index_> > dense(
727 Index_ full_non_target = (row ? ncol_internal() : nrow_internal());
728 return populate<true, DenseMatrix_internal::Full>(row, full_non_target, std::move(oracle), full_non_target);
731 std::unique_ptr<tatami::OracularDenseExtractor<Value_, Index_> > dense(
738 return populate<true, DenseMatrix_internal::Block>(row, block_length, std::move(oracle), block_start, block_length);
741 std::unique_ptr<tatami::OracularDenseExtractor<Value_, Index_> > dense(
747 auto nidx = indices_ptr->size();
748 return populate<true, DenseMatrix_internal::Index>(row, nidx, std::move(oracle), std::move(indices_ptr));
755 std::unique_ptr<tatami::OracularSparseExtractor<Value_, Index_> > sparse(
760 Index_ full_non_target = (row ? ncol_internal() : nrow_internal());
761 return std::make_unique<tatami::FullSparsifiedWrapper<true, Value_, Index_> >(dense(row, std::move(oracle), opt), full_non_target, opt);
764 std::unique_ptr<tatami::OracularSparseExtractor<Value_, Index_> > sparse(
771 return std::make_unique<tatami::BlockSparsifiedWrapper<true, Value_, Index_> >(dense(row, std::move(oracle), block_start, block_length, opt), block_start, block_length, opt);
774 std::unique_ptr<tatami::OracularSparseExtractor<Value_, Index_> > sparse(
780 auto ptr = dense(row, std::move(oracle), indices_ptr, opt);
781 return std::make_unique<tatami::IndexSparsifiedWrapper<true, Value_, Index_> >(std::move(ptr), std::move(indices_ptr), opt);