66 my_seed(std::move(seed)),
67 my_module(pybind11::module::import(
"delayedarray")),
68 my_dense_extractor(my_module.attr(
"extract_dense_array")),
69 my_sparse_extractor(my_module.attr(
"extract_sparse_array")),
70 my_cache_size_in_bytes(opt.maximum_cache_size),
71 my_require_minimum_cache(opt.require_minimum_cache)
77 const auto shape = get_shape<Index_>(my_seed);
78 my_nrow = shape.first;
79 my_ncol = shape.second;
85 auto sparse = my_module.attr(
"is_sparse")(my_seed);
86 my_sparse = sparse.template cast<bool>();
88 auto grid = my_module.attr(
"chunk_grid")(my_seed);
89 auto bounds = grid.attr(
"boundaries").template cast<pybind11::tuple>();
90 if (bounds.size() != 2) {
91 auto ctype = get_class_name(seed);
92 throw std::runtime_error(
"'chunk_grid(<" + ctype +
">).boundaries' should be a tuple of length 2");
95 auto np = pybind11::module::import(
"numpy");
96 auto arrayfun = np.attr(
"array");
97 auto populate = [&](Index_ extent,
const pybind11::object& raw_ticks, std::vector<Index_>& map, std::vector<Index_>& new_ticks, Index_& max_chunk_size) {
100 auto tick_array = arrayfun(raw_ticks, pybind11::dtype::of<Index_>());
101 auto ticks = tick_array.template cast<pybind11::array_t<Index_> >();
102 const auto tptr =
static_cast<Index_*
>(ticks.request().ptr);
103 const auto nticks = ticks.size();
105 new_ticks.reserve(sanisizer::sum<
decltype(new_ticks.size())>(nticks, 1));
106 new_ticks.push_back(0);
111 for (I<
decltype(nticks)> i = 0; i < nticks; ++i) {
112 const auto latest = tptr[i];
113 const auto previous = new_ticks.back();
114 if (latest <= previous) {
115 auto ctype = get_class_name(seed);
116 throw std::runtime_error(
"boundaries are not strictly increasing in the output of 'chunk_grid(<" + ctype +
">).boundaries'");
118 new_ticks.push_back(latest);
120 std::fill(map.begin() + previous, map.begin() + latest, counter);
122 const auto to_fill = latest - previous;
123 if (to_fill > max_chunk_size) {
124 max_chunk_size = to_fill;
128 if (!sanisizer::is_equal(new_ticks.back(), extent)) {
129 auto ctype = get_class_name(seed);
130 throw std::runtime_error(
"invalid ticks returned in 'chunk_grid(<" + ctype +
">).boundaries'");
134 populate(my_nrow, bounds[0], my_row_chunk_map, my_row_chunk_ticks, my_row_max_chunk_size);
135 populate(my_ncol, bounds[1], my_col_chunk_map, my_col_chunk_ticks, my_col_max_chunk_size);
138 auto chunks_per_row = my_col_chunk_ticks.size() - 1;
139 auto chunks_per_col = my_row_chunk_ticks.size() - 1;
140 my_prefer_rows = chunks_per_row <= chunks_per_col;
144 Index_ my_nrow, my_ncol;
145 bool my_sparse, my_prefer_rows;
147 std::vector<Index_> my_row_chunk_map, my_col_chunk_map;
148 std::vector<Index_> my_row_chunk_ticks, my_col_chunk_ticks;
158 Index_ my_row_max_chunk_size, my_col_max_chunk_size;
160 pybind11::object my_seed;
161 pybind11::module my_module;
162 pybind11::object my_dense_extractor, my_sparse_extractor;
164 std::size_t my_cache_size_in_bytes;
165 bool my_require_minimum_cache;
168 Index_ nrow()
const {
172 Index_ ncol()
const {
176 bool is_sparse()
const {
180 double is_sparse_proportion()
const {
181 return static_cast<double>(my_sparse);
184 bool prefer_rows()
const {
185 return my_prefer_rows;
188 double prefer_rows_proportion()
const {
189 return static_cast<double>(my_prefer_rows);
192 bool uses_oracle(
bool)
const {
197 Index_ max_primary_chunk_length(
bool row)
const {
198 return (row ? my_row_max_chunk_size : my_col_max_chunk_size);
201 Index_ primary_num_chunks(
bool row, Index_ primary_chunk_length)
const {
202 auto primary_dim = (row ? my_nrow : my_ncol);
203 if (primary_chunk_length == 0) {
206 return primary_dim / primary_chunk_length;
210 Index_ secondary_dim(
bool row)
const {
211 return (row ? my_ncol : my_nrow);
214 const std::vector<Index_>& chunk_ticks(
bool row)
const {
216 return my_row_chunk_ticks;
218 return my_col_chunk_ticks;
222 const std::vector<Index_>& chunk_map(
bool row)
const {
224 return my_row_chunk_map;
226 return my_col_chunk_map;
236 template <
bool,
bool,
typename,
typename,
typename>
class FromDense_,
237 template <
bool,
bool,
typename,
typename,
typename,
typename>
class FromSparse_,
240 std::unique_ptr<tatami::DenseExtractor<oracle_, Value_, Index_> > populate_dense_internal(
242 Index_ non_target_length,
246 Index_ max_target_chunk_length = max_primary_chunk_length(row);
247 tatami_chunked::SlabCacheStats<Index_> stats(
248 max_target_chunk_length,
250 primary_num_chunks(row, max_target_chunk_length),
251 my_cache_size_in_bytes,
252 sizeof(CachedValue_),
253 my_require_minimum_cache
256 const auto& map = chunk_map(row);
257 const auto& ticks = chunk_ticks(row);
258 const bool solo = (stats.max_slabs_in_cache == 0);
260 std::unique_ptr<tatami::DenseExtractor<oracle_, Value_, Index_> > output;
261#ifdef TATAMI_PYTHON_PARALLELIZE_UNKNOWN
268 new FromDense_<true, oracle_, Value_, Index_, CachedValue_>(
273 std::forward<Args_>(args)...,
282 new FromDense_<false, oracle_, Value_, Index_, CachedValue_>(
287 std::forward<Args_>(args)...,
298 new FromSparse_<true, oracle_, Value_, Index_, CachedValue_, CachedIndex_>(
303 std::forward<Args_>(args)...,
304 max_target_chunk_length,
313 new FromSparse_<false, oracle_, Value_, Index_, CachedValue_, CachedIndex_>(
318 std::forward<Args_>(args)...,
319 max_target_chunk_length,
328#ifdef TATAMI_PYTHON_PARALLELIZE_UNKNOWN
335 template<
bool oracle_>
336 std::unique_ptr<tatami::DenseExtractor<oracle_, Value_, Index_> > populate_dense(
341 Index_ non_target_dim = secondary_dim(row);
342 return populate_dense_internal<oracle_, DenseFull, DensifiedSparseFull>(
350 template<
bool oracle_>
351 std::unique_ptr<tatami::DenseExtractor<oracle_, Value_, Index_> > populate_dense(
358 return populate_dense_internal<oracle_, DenseBlock, DensifiedSparseBlock>(
367 template<
bool oracle_>
368 std::unique_ptr<tatami::DenseExtractor<oracle_, Value_, Index_> > populate_dense(
374 Index_ nidx = indices_ptr->size();
375 return populate_dense_internal<oracle_, DenseIndexed, DensifiedSparseIndexed>(
379 std::move(indices_ptr)
384 std::unique_ptr<tatami::MyopicDenseExtractor<Value_, Index_> > dense(
388 return populate_dense<false>(row,
false, opt);
391 std::unique_ptr<tatami::MyopicDenseExtractor<Value_, Index_> > dense(
397 return populate_dense<false>(row,
false, block_start, block_length, opt);
400 std::unique_ptr<tatami::MyopicDenseExtractor<Value_, Index_> > dense(
405 return populate_dense<false>(row,
false, std::move(indices_ptr), opt);
412 std::unique_ptr<tatami::OracularDenseExtractor<Value_, Index_> > dense(
417 return populate_dense<true>(row, std::move(ora), opt);
420 std::unique_ptr<tatami::OracularDenseExtractor<Value_, Index_> > dense(
427 return populate_dense<true>(row, std::move(ora), block_start, block_length, opt);
430 std::unique_ptr<tatami::OracularDenseExtractor<Value_, Index_> > dense(
436 return populate_dense<true>(row, std::move(ora), std::move(indices_ptr), opt);
445 template<
bool,
bool,
typename,
typename,
typename,
typename>
class FromSparse_,
448 std::unique_ptr<tatami::SparseExtractor<oracle_, Value_, Index_> > populate_sparse_internal(
450 Index_ non_target_length,
455 Index_ max_target_chunk_length = max_primary_chunk_length(row);
456 tatami_chunked::SlabCacheStats<Index_> stats(
457 max_target_chunk_length,
459 primary_num_chunks(row, max_target_chunk_length),
460 my_cache_size_in_bytes,
462 my_require_minimum_cache
465 const auto& map = chunk_map(row);
466 const auto& ticks = chunk_ticks(row);
469 const bool solo = stats.max_slabs_in_cache == 0;
471 std::unique_ptr<tatami::SparseExtractor<oracle_, Value_, Index_> > output;
472#ifdef TATAMI_PYTHON_PARALLELIZE_UNKNOWN
478 new FromSparse_<true, oracle_, Value_, Index_, CachedValue_, CachedIndex_>(
483 std::forward<Args_>(args)...,
484 max_target_chunk_length,
495 new FromSparse_<false, oracle_, Value_, Index_, CachedValue_, CachedIndex_>(
500 std::forward<Args_>(args)...,
501 max_target_chunk_length,
511#ifdef TATAMI_PYTHON_PARALLELIZE_UNKNOWN
518 template<
bool oracle_>
519 std::unique_ptr<tatami::SparseExtractor<oracle_, Value_, Index_> > populate_sparse(
524 Index_ non_target_dim = secondary_dim(row);
525 return populate_sparse_internal<oracle_, SparseFull>(
534 template<
bool oracle_>
535 std::unique_ptr<tatami::SparseExtractor<oracle_, Value_, Index_> > populate_sparse(
542 return populate_sparse_internal<oracle_, SparseBlock>(
552 template<
bool oracle_>
553 std::unique_ptr<tatami::SparseExtractor<oracle_, Value_, Index_> > populate_sparse(
559 Index_ nidx = indices_ptr->size();
560 return populate_sparse_internal<oracle_, SparseIndexed>(
565 std::move(indices_ptr)
570 std::unique_ptr<tatami::MyopicSparseExtractor<Value_, Index_> > sparse(
575 return std::make_unique<tatami::FullSparsifiedWrapper<false, Value_, Index_> >(
581 return populate_sparse<false>(
589 std::unique_ptr<tatami::MyopicSparseExtractor<Value_, Index_> > sparse(
596 return std::make_unique<tatami::BlockSparsifiedWrapper<false, Value_, Index_> >(
597 dense(row, block_start, block_length, opt),
603 return populate_sparse<false>(
613 std::unique_ptr<tatami::MyopicSparseExtractor<Value_, Index_> > sparse(
619 auto index_copy = indices_ptr;
620 return std::make_unique<tatami::IndexSparsifiedWrapper<false, Value_, Index_> >(
621 dense(row, std::move(indices_ptr), opt),
622 std::move(index_copy),
626 return populate_sparse<false>(
629 std::move(indices_ptr),
639 std::unique_ptr<tatami::OracularSparseExtractor<Value_, Index_> > sparse(
645 return std::make_unique<tatami::FullSparsifiedWrapper<true, Value_, Index_> >(
646 dense(row, std::move(ora), opt),
651 return populate_sparse<true>(
659 std::unique_ptr<tatami::OracularSparseExtractor<Value_, Index_> > sparse(
667 return std::make_unique<tatami::BlockSparsifiedWrapper<true, Value_, Index_> >(
668 dense(row, std::move(ora), block_start, block_length, opt),
674 return populate_sparse<true>(
684 std::unique_ptr<tatami::OracularSparseExtractor<Value_, Index_> > sparse(
691 auto index_copy = indices_ptr;
692 return std::make_unique<tatami::IndexSparsifiedWrapper<true, Value_, Index_> >(
693 dense(row, std::move(ora),
694 std::move(indices_ptr), opt),
695 std::move(index_copy),
699 return populate_sparse<true>(
702 std::move(indices_ptr),