65 my_original_seed(seed),
66 my_delayed_env(Rcpp::Environment::namespace_env(
"DelayedArray")),
67 my_sparse_env(Rcpp::Environment::namespace_env(
"SparseArray")),
68 my_dense_extractor(my_delayed_env[
"extract_array"]),
69 my_sparse_extractor(my_sparse_env[
"extract_sparse_array"])
76 const auto base = Rcpp::Environment::base_env();
77 const Rcpp::Function fun = base[
"dim"];
78 const Rcpp::RObject output = fun(seed);
79 if (output.sexp_type() != INTSXP) {
80 auto ctype = get_class_name(my_original_seed);
81 throw std::runtime_error(
"'dim(<" + ctype +
">)' should return an integer vector");
84 const Rcpp::IntegerVector dims(output);
85 if (dims.size() != 2 || dims[0] < 0 || dims[1] < 0) {
86 auto ctype = get_class_name(my_original_seed);
87 throw std::runtime_error(
"'dim(<" + ctype +
">)' should contain two non-negative integers");
93 my_nrow = sanisizer::cast<Index_>(dims[0]);
94 my_ncol = sanisizer::cast<Index_>(dims[1]);
102 const Rcpp::Function fun = my_delayed_env[
"is_sparse"];
103 const Rcpp::LogicalVector is_sparse = fun(seed);
104 if (is_sparse.size() != 1) {
105 auto ctype = get_class_name(my_original_seed);
106 throw std::runtime_error(
"'is_sparse(<" + ctype +
">)' should return a logical vector of length 1");
108 my_sparse = (is_sparse[0] != 0);
115 const Rcpp::Function fun = my_delayed_env[
"chunkGrid"];
116 const Rcpp::RObject grid = fun(seed);
118 if (grid == R_NilValue) {
119 my_row_max_chunk_size = 1;
120 my_col_max_chunk_size = 1;
121 std::iota(my_row_chunk_map.begin(), my_row_chunk_map.end(),
static_cast<Index_
>(0));
122 std::iota(my_col_chunk_map.begin(), my_col_chunk_map.end(),
static_cast<Index_
>(0));
123 my_row_chunk_ticks.resize(sanisizer::sum<
decltype(my_row_chunk_ticks.size())>(my_nrow, 1));
124 std::iota(my_row_chunk_ticks.begin(), my_row_chunk_ticks.end(),
static_cast<Index_
>(0));
125 my_col_chunk_ticks.resize(sanisizer::sum<
decltype(my_col_chunk_ticks.size())>(my_ncol, 1));
126 std::iota(my_col_chunk_ticks.begin(), my_col_chunk_ticks.end(),
static_cast<Index_
>(0));
131 my_prefer_rows =
false;
134 auto grid_cls = get_class_name(grid);
136 if (grid_cls ==
"RegularArrayGrid") {
137 const Rcpp::IntegerVector spacings(Rcpp::RObject(grid.slot(
"spacings")));
138 if (spacings.size() != 2) {
139 auto ctype = get_class_name(seed);
140 throw std::runtime_error(
"'chunkGrid(<" + ctype +
">)@spacings' should be an integer vector of length 2 with non-negative values");
143 const auto populate = [](
145 const Index_ spacing,
146 std::vector<Index_>& map,
147 std::vector<Index_>& ticks
152 ticks.reserve((extent / spacing) + (extent % spacing > 0) + 1);
154 ticks.push_back(start);
155 while (start != extent) {
156 auto to_fill = std::min(spacing, extent - start);
157 std::fill_n(map.begin() + start, to_fill, ticks.size() - 1);
159 ticks.push_back(start);
164 my_row_max_chunk_size = spacings[0];
165 populate(my_nrow, my_row_max_chunk_size, my_row_chunk_map, my_row_chunk_ticks);
166 my_col_max_chunk_size = spacings[1];
167 populate(my_ncol, my_col_max_chunk_size, my_col_chunk_map, my_col_chunk_ticks);
169 }
else if (grid_cls ==
"ArbitraryArrayGrid") {
170 const Rcpp::List ticks(Rcpp::RObject(grid.slot(
"tickmarks")));
171 if (ticks.size() != 2) {
172 auto ctype = get_class_name(seed);
173 throw std::runtime_error(
"'chunkGrid(<" + ctype +
">)@tickmarks' should return a list of length 2");
176 const auto populate = [](
178 const Rcpp::IntegerVector& ticks,
179 std::vector<Index_>& map,
180 std::vector<Index_>& new_ticks,
181 Index_& max_chunk_size
183 if (ticks.size() != 0 && ticks[ticks.size() - 1] !=
static_cast<int>(extent)) {
184 throw std::runtime_error(
"invalid ticks returned by 'chunkGrid'");
186 new_ticks.resize(sanisizer::sum<
decltype(new_ticks.size())>(ticks.size(), 1));
187 std::copy(ticks.begin(), ticks.end(), new_ticks.begin() + 1);
194 for (
auto t : ticks) {
196 throw std::runtime_error(
"invalid ticks returned by 'chunkGrid'");
198 Index_ to_fill = t - start;
199 if (to_fill > max_chunk_size) {
200 max_chunk_size = to_fill;
202 std::fill_n(map.begin() + start, to_fill, counter);
208 Rcpp::IntegerVector first(ticks[0]);
209 populate(my_nrow, first, my_row_chunk_map, my_row_chunk_ticks, my_row_max_chunk_size);
210 Rcpp::IntegerVector second(ticks[1]);
211 populate(my_ncol, second, my_col_chunk_map, my_col_chunk_ticks, my_col_max_chunk_size);
214 auto ctype = get_class_name(seed);
215 throw std::runtime_error(
"instance of unknown class '" + grid_cls +
"' returned by 'chunkGrid(<" + ctype +
">)");
219 const auto chunks_per_row = my_col_chunk_ticks.size() - 1;
220 const auto chunks_per_col = my_row_chunk_ticks.size() - 1;
221 my_prefer_rows = chunks_per_row <= chunks_per_col;
227 my_cache_size_in_bytes = *(opt.maximum_cache_size);
229 Rcpp::Function fun = my_delayed_env[
"getAutoBlockSize"];
230 Rcpp::NumericVector bsize = fun();
231 if (bsize.size() != 1 || bsize[0] < 0) {
232 throw std::runtime_error(
"'getAutoBlockSize()' should return a non-negative number of bytes");
233 }
else if (bsize[0] > std::numeric_limits<std::size_t>::max()) {
234 throw std::runtime_error(
"integer overflow from the current value of 'getAutoBlockSize()'");
236 my_cache_size_in_bytes = bsize[0];
249 Index_ my_nrow, my_ncol;
250 bool my_sparse, my_prefer_rows;
252 std::vector<Index_> my_row_chunk_map, my_col_chunk_map;
253 std::vector<Index_> my_row_chunk_ticks, my_col_chunk_ticks;
263 Index_ my_row_max_chunk_size, my_col_max_chunk_size;
265 std::size_t my_cache_size_in_bytes;
266 bool my_require_minimum_cache;
268 Rcpp::RObject my_original_seed;
269 Rcpp::Environment my_delayed_env, my_sparse_env;
270 Rcpp::Function my_dense_extractor, my_sparse_extractor;
273 Index_ nrow()
const {
277 Index_ ncol()
const {
281 bool is_sparse()
const {
285 double is_sparse_proportion()
const {
286 return static_cast<double>(my_sparse);
289 bool prefer_rows()
const {
290 return my_prefer_rows;
293 double prefer_rows_proportion()
const {
294 return static_cast<double>(my_prefer_rows);
297 bool uses_oracle(
bool)
const {
302 Index_ max_primary_chunk_length(
const bool row)
const {
303 return (row ? my_row_max_chunk_size : my_col_max_chunk_size);
306 Index_ primary_num_chunks(
const bool row,
const Index_ primary_chunk_length)
const {
307 auto primary_dim = (row ? my_nrow : my_ncol);
308 if (primary_chunk_length == 0) {
311 return primary_dim / primary_chunk_length;
315 Index_ secondary_dim(
const bool row)
const {
316 return (row ? my_ncol : my_nrow);
319 const std::vector<Index_>& chunk_ticks(
const bool row)
const {
321 return my_row_chunk_ticks;
323 return my_col_chunk_ticks;
327 const std::vector<Index_>& chunk_map(
const bool row)
const {
329 return my_row_chunk_map;
331 return my_col_chunk_map;
341 template <
bool,
bool,
typename,
typename,
typename>
class FromDense_,
342 template <
bool,
bool,
typename,
typename,
typename,
typename>
class FromSparse_,
345 std::unique_ptr<tatami::DenseExtractor<oracle_, Value_, Index_> > populate_dense_internal(
347 const Index_ non_target_length,
351 std::unique_ptr<tatami::DenseExtractor<oracle_, Value_, Index_> > output;
353 const Index_ max_target_chunk_length = max_primary_chunk_length(row);
354 tatami_chunked::SlabCacheStats<Index_> stats(
355 max_target_chunk_length,
357 primary_num_chunks(row, max_target_chunk_length),
358 my_cache_size_in_bytes,
359 sizeof(CachedValue_),
360 my_require_minimum_cache
363 const auto& map = chunk_map(row);
364 const auto& ticks = chunk_ticks(row);
365 const bool solo = (stats.max_slabs_in_cache == 0);
367#ifdef TATAMI_R_PARALLELIZE_UNKNOWN
370 mexec.run([&]() ->
void {
376 new FromDense_<true, oracle_, Value_, Index_, CachedValue_>(
381 std::forward<Args_>(args)...,
390 new FromDense_<false, oracle_, Value_, Index_, CachedValue_>(
395 std::forward<Args_>(args)...,
406 new FromSparse_<true, oracle_, Value_, Index_, CachedValue_, CachedIndex_>(
411 std::forward<Args_>(args)...,
412 max_target_chunk_length,
421 new FromSparse_<false, oracle_, Value_, Index_, CachedValue_, CachedIndex_>(
426 std::forward<Args_>(args)...,
427 max_target_chunk_length,
436#ifdef TATAMI_R_PARALLELIZE_UNKNOWN
443 template<
bool oracle_>
444 std::unique_ptr<tatami::DenseExtractor<oracle_, Value_, Index_> > populate_dense(
449 const Index_ non_target_dim = secondary_dim(row);
450 return populate_dense_internal<oracle_, DenseFull, DensifiedSparseFull>(
458 template<
bool oracle_>
459 std::unique_ptr<tatami::DenseExtractor<oracle_, Value_, Index_> > populate_dense(
462 const Index_ block_start,
463 const Index_ block_length,
466 return populate_dense_internal<oracle_, DenseBlock, DensifiedSparseBlock>(
475 template<
bool oracle_>
476 std::unique_ptr<tatami::DenseExtractor<oracle_, Value_, Index_> > populate_dense(
482 const Index_ nidx = indices_ptr->size();
483 return populate_dense_internal<oracle_, DenseIndexed, DensifiedSparseIndexed>(
487 std::move(indices_ptr)
492 std::unique_ptr<tatami::MyopicDenseExtractor<Value_, Index_> > dense(
496 return populate_dense<false>(row,
false, opt);
499 std::unique_ptr<tatami::MyopicDenseExtractor<Value_, Index_> > dense(
501 const Index_ block_start,
502 const Index_ block_length,
505 return populate_dense<false>(row,
false, block_start, block_length, opt);
508 std::unique_ptr<tatami::MyopicDenseExtractor<Value_, Index_> > dense(
513 return populate_dense<false>(row,
false, std::move(indices_ptr), opt);
520 std::unique_ptr<tatami::OracularDenseExtractor<Value_, Index_> > dense(
525 return populate_dense<true>(row, std::move(ora), opt);
528 std::unique_ptr<tatami::OracularDenseExtractor<Value_, Index_> > dense(
531 const Index_ block_start,
532 const Index_ block_length,
535 return populate_dense<true>(row, std::move(ora), block_start, block_length, opt);
538 std::unique_ptr<tatami::OracularDenseExtractor<Value_, Index_> > dense(
544 return populate_dense<true>(row, std::move(ora), std::move(indices_ptr), opt);
553 template<
bool,
bool,
typename,
typename,
typename,
typename>
class FromSparse_,
556 std::unique_ptr<tatami::SparseExtractor<oracle_, Value_, Index_> > populate_sparse_internal(
558 const Index_ non_target_length,
563 const Index_ max_target_chunk_length = max_primary_chunk_length(row);
564 tatami_chunked::SlabCacheStats<Index_> stats(
565 max_target_chunk_length,
567 primary_num_chunks(row, max_target_chunk_length),
568 my_cache_size_in_bytes,
570 my_require_minimum_cache
573 const auto& map = chunk_map(row);
574 const auto& ticks = chunk_ticks(row);
577 const bool solo = stats.max_slabs_in_cache == 0;
579 std::unique_ptr<tatami::SparseExtractor<oracle_, Value_, Index_> > output;
581#ifdef TATAMI_R_PARALLELIZE_UNKNOWN
584 mexec.run([&]() ->
void {
589 new FromSparse_<true, oracle_, Value_, Index_, CachedValue_, CachedIndex_>(
594 std::forward<Args_>(args)...,
595 max_target_chunk_length,
606 new FromSparse_<false, oracle_, Value_, Index_, CachedValue_, CachedIndex_>(
611 std::forward<Args_>(args)...,
612 max_target_chunk_length,
622#ifdef TATAMI_R_PARALLELIZE_UNKNOWN
629 template<
bool oracle_>
630 std::unique_ptr<tatami::SparseExtractor<oracle_, Value_, Index_> > populate_sparse(
635 const Index_ non_target_dim = secondary_dim(row);
636 return populate_sparse_internal<oracle_, SparseFull>(
645 template<
bool oracle_>
646 std::unique_ptr<tatami::SparseExtractor<oracle_, Value_, Index_> > populate_sparse(
649 const Index_ block_start,
650 const Index_ block_length,
653 return populate_sparse_internal<oracle_, SparseBlock>(
663 template<
bool oracle_>
664 std::unique_ptr<tatami::SparseExtractor<oracle_, Value_, Index_> > populate_sparse(
670 return populate_sparse_internal<oracle_, SparseIndexed>(
675 std::move(indices_ptr)
680 std::unique_ptr<tatami::MyopicSparseExtractor<Value_, Index_> > sparse(
685 return std::make_unique<tatami::FullSparsifiedWrapper<false, Value_, Index_> >(
691 return populate_sparse<false>(row,
false, opt);
695 std::unique_ptr<tatami::MyopicSparseExtractor<Value_, Index_> > sparse(
697 const Index_ block_start,
698 const Index_ block_length,
702 return std::make_unique<tatami::BlockSparsifiedWrapper<false, Value_, Index_> >(
703 dense(row, block_start, block_length, opt),
709 return populate_sparse<false>(row,
false, block_start, block_length, opt);
713 std::unique_ptr<tatami::MyopicSparseExtractor<Value_, Index_> > sparse(
719 auto index_copy = indices_ptr;
720 return std::make_unique<tatami::IndexSparsifiedWrapper<false, Value_, Index_> >(
721 dense(row, std::move(indices_ptr), opt),
722 std::move(index_copy),
726 return populate_sparse<false>(row,
false, std::move(indices_ptr), opt);
734 std::unique_ptr<tatami::OracularSparseExtractor<Value_, Index_> > sparse(
740 return std::make_unique<tatami::FullSparsifiedWrapper<true, Value_, Index_> >(
741 dense(row, std::move(ora), opt),
746 return populate_sparse<true>(row, std::move(ora), opt);
750 std::unique_ptr<tatami::OracularSparseExtractor<Value_, Index_> > sparse(
753 const Index_ block_start,
754 const Index_ block_length,
758 return std::make_unique<tatami::BlockSparsifiedWrapper<true, Value_, Index_> >(
759 dense(row, std::move(ora), block_start, block_length, opt),
765 return populate_sparse<true>(row, std::move(ora), block_start, block_length, opt);
769 std::unique_ptr<tatami::OracularSparseExtractor<Value_, Index_> > sparse(
776 auto index_copy = indices_ptr;
777 return std::make_unique<tatami::IndexSparsifiedWrapper<true, Value_, Index_> >(
778 dense(row, std::move(ora), std::move(indices_ptr), opt),
779 std::move(index_copy),
783 return populate_sparse<true>(row, std::move(ora), std::move(indices_ptr), opt);