60 my_original_seed(seed),
61 my_delayed_env(Rcpp::Environment::namespace_env(
"DelayedArray")),
62 my_sparse_env(Rcpp::Environment::namespace_env(
"SparseArray")),
63 my_dense_extractor(my_delayed_env[
"extract_array"]),
64 my_sparse_extractor(my_sparse_env[
"extract_sparse_array"])
71 auto base = Rcpp::Environment::base_env();
72 Rcpp::Function fun = base[
"dim"];
73 Rcpp::RObject output = fun(seed);
74 if (output.sexp_type() != INTSXP) {
75 auto ctype = get_class_name(my_original_seed);
76 throw std::runtime_error(
"'dim(<" + ctype +
">)' should return an integer vector");
78 Rcpp::IntegerVector dims(output);
79 if (dims.size() != 2 || dims[0] < 0 || dims[1] < 0) {
80 auto ctype = get_class_name(my_original_seed);
81 throw std::runtime_error(
"'dim(<" + ctype +
">)' should contain two non-negative integers");
87 my_nrow = sanisizer::cast<Index_>(dims[0]);
88 my_ncol = sanisizer::cast<Index_>(dims[1]);
92 Rcpp::Function fun = my_delayed_env[
"is_sparse"];
93 Rcpp::LogicalVector is_sparse = fun(seed);
94 if (is_sparse.size() != 1) {
95 auto ctype = get_class_name(my_original_seed);
96 throw std::runtime_error(
"'is_sparse(<" + ctype +
">)' should return a logical vector of length 1");
98 my_sparse = (is_sparse[0] != 0);
105 Rcpp::Function fun = my_delayed_env[
"chunkGrid"];
106 Rcpp::RObject grid = fun(seed);
108 if (grid == R_NilValue) {
109 my_row_max_chunk_size = 1;
110 my_col_max_chunk_size = 1;
111 std::iota(my_row_chunk_map.begin(), my_row_chunk_map.end(),
static_cast<Index_
>(0));
112 std::iota(my_col_chunk_map.begin(), my_col_chunk_map.end(),
static_cast<Index_
>(0));
113 my_row_chunk_ticks.resize(sanisizer::sum<
decltype(my_row_chunk_ticks.size())>(my_nrow, 1));
114 std::iota(my_row_chunk_ticks.begin(), my_row_chunk_ticks.end(),
static_cast<Index_
>(0));
115 my_col_chunk_ticks.resize(sanisizer::sum<
decltype(my_col_chunk_ticks.size())>(my_ncol, 1));
116 std::iota(my_col_chunk_ticks.begin(), my_col_chunk_ticks.end(),
static_cast<Index_
>(0));
121 my_prefer_rows =
false;
124 auto grid_cls = get_class_name(grid);
126 if (grid_cls ==
"RegularArrayGrid") {
127 Rcpp::IntegerVector spacings(Rcpp::RObject(grid.slot(
"spacings")));
128 if (spacings.size() != 2) {
129 auto ctype = get_class_name(seed);
130 throw std::runtime_error(
"'chunkGrid(<" + ctype +
">)@spacings' should be an integer vector of length 2 with non-negative values");
133 auto populate = [](Index_ extent, Index_ spacing, std::vector<Index_>& map, std::vector<Index_>& ticks) {
137 ticks.reserve((extent / spacing) + (extent % spacing > 0) + 1);
139 ticks.push_back(start);
140 while (start != extent) {
141 auto to_fill = std::min(spacing, extent - start);
142 std::fill_n(map.begin() + start, to_fill, ticks.size() - 1);
144 ticks.push_back(start);
149 my_row_max_chunk_size = spacings[0];
150 populate(my_nrow, my_row_max_chunk_size, my_row_chunk_map, my_row_chunk_ticks);
151 my_col_max_chunk_size = spacings[1];
152 populate(my_ncol, my_col_max_chunk_size, my_col_chunk_map, my_col_chunk_ticks);
154 }
else if (grid_cls ==
"ArbitraryArrayGrid") {
155 Rcpp::List ticks(Rcpp::RObject(grid.slot(
"tickmarks")));
156 if (ticks.size() != 2) {
157 auto ctype = get_class_name(seed);
158 throw std::runtime_error(
"'chunkGrid(<" + ctype +
">)@tickmarks' should return a list of length 2");
161 auto populate = [](Index_ extent,
const Rcpp::IntegerVector& ticks, std::vector<Index_>& map, std::vector<Index_>& new_ticks, Index_& max_chunk_size) {
162 if (ticks.size() == 0 || ticks[ticks.size() - 1] !=
static_cast<int>(extent)) {
163 throw std::runtime_error(
"invalid ticks returned by 'chunkGrid'");
165 new_ticks.resize(sanisizer::sum<
decltype(new_ticks.size())>(ticks.size(), 1));
166 std::copy(ticks.begin(), ticks.end(), new_ticks.begin() + 1);
173 for (
auto t : ticks) {
175 throw std::runtime_error(
"invalid ticks returned by 'chunkGrid'");
177 Index_ to_fill = t - start;
178 if (to_fill > max_chunk_size) {
179 max_chunk_size = to_fill;
181 std::fill_n(map.begin() + start, to_fill, counter);
187 Rcpp::IntegerVector first(ticks[0]);
188 populate(my_nrow, first, my_row_chunk_map, my_row_chunk_ticks, my_row_max_chunk_size);
189 Rcpp::IntegerVector second(ticks[1]);
190 populate(my_ncol, second, my_col_chunk_map, my_col_chunk_ticks, my_col_max_chunk_size);
193 auto ctype = get_class_name(seed);
194 throw std::runtime_error(
"instance of unknown class '" + grid_cls +
"' returned by 'chunkGrid(<" + ctype +
">)");
198 auto chunks_per_row = my_col_chunk_ticks.size() - 1;
199 auto chunks_per_col = my_row_chunk_ticks.size() - 1;
200 my_prefer_rows = chunks_per_row <= chunks_per_col;
206 my_cache_size_in_bytes = *(opt.maximum_cache_size);
208 Rcpp::Function fun = my_delayed_env[
"getAutoBlockSize"];
209 Rcpp::NumericVector bsize = fun();
210 if (bsize.size() != 1 || bsize[0] < 0) {
211 throw std::runtime_error(
"'getAutoBlockSize()' should return a non-negative number of bytes");
212 }
else if (bsize[0] > std::numeric_limits<std::size_t>::max()) {
213 throw std::runtime_error(
"integer overflow from the current value of 'getAutoBlockSize()'");
215 my_cache_size_in_bytes = bsize[0];
228 Index_ my_nrow, my_ncol;
229 bool my_sparse, my_prefer_rows;
231 std::vector<Index_> my_row_chunk_map, my_col_chunk_map;
232 std::vector<Index_> my_row_chunk_ticks, my_col_chunk_ticks;
242 Index_ my_row_max_chunk_size, my_col_max_chunk_size;
244 std::size_t my_cache_size_in_bytes;
245 bool my_require_minimum_cache;
247 Rcpp::RObject my_original_seed;
248 Rcpp::Environment my_delayed_env, my_sparse_env;
249 Rcpp::Function my_dense_extractor, my_sparse_extractor;
252 Index_ nrow()
const {
256 Index_ ncol()
const {
260 bool is_sparse()
const {
264 double is_sparse_proportion()
const {
265 return static_cast<double>(my_sparse);
268 bool prefer_rows()
const {
269 return my_prefer_rows;
272 double prefer_rows_proportion()
const {
273 return static_cast<double>(my_prefer_rows);
276 bool uses_oracle(
bool)
const {
281 Index_ max_primary_chunk_length(
bool row)
const {
282 return (row ? my_row_max_chunk_size : my_col_max_chunk_size);
285 Index_ primary_num_chunks(
bool row, Index_ primary_chunk_length)
const {
286 auto primary_dim = (row ? my_nrow : my_ncol);
287 if (primary_chunk_length == 0) {
290 return primary_dim / primary_chunk_length;
294 Index_ secondary_dim(
bool row)
const {
295 return (row ? my_ncol : my_nrow);
298 const std::vector<Index_>& chunk_ticks(
bool row)
const {
300 return my_row_chunk_ticks;
302 return my_col_chunk_ticks;
306 const std::vector<Index_>& chunk_map(
bool row)
const {
308 return my_row_chunk_map;
310 return my_col_chunk_map;
320 template <
bool,
bool,
typename,
typename,
typename>
class FromDense_,
321 template <
bool,
bool,
typename,
typename,
typename,
typename>
class FromSparse_,
324 std::unique_ptr<tatami::DenseExtractor<oracle_, Value_, Index_> > populate_dense_internal(
bool row, Index_ non_target_length,
tatami::MaybeOracle<oracle_, Index_> oracle, Args_&& ... args)
const {
325 std::unique_ptr<tatami::DenseExtractor<oracle_, Value_, Index_> > output;
327 Index_ max_target_chunk_length = max_primary_chunk_length(row);
328 tatami_chunked::SlabCacheStats<Index_> stats(
329 max_target_chunk_length,
331 primary_num_chunks(row, max_target_chunk_length),
332 my_cache_size_in_bytes,
333 sizeof(CachedValue_),
334 my_require_minimum_cache
337 const auto& map = chunk_map(row);
338 const auto& ticks = chunk_ticks(row);
339 bool solo = (stats.max_slabs_in_cache == 0);
341#ifdef TATAMI_R_PARALLELIZE_UNKNOWN
343 auto& mexec = executor();
344 mexec.run([&]() ->
void {
349 typedef FromDense_<true, oracle_, Value_, Index_, CachedValue_> ShortDense;
350 output.reset(
new ShortDense(my_original_seed, my_dense_extractor, row, std::move(oracle), std::forward<Args_>(args)..., ticks, map, stats));
352 typedef FromDense_<false, oracle_, Value_, Index_, CachedValue_> ShortDense;
353 output.reset(
new ShortDense(my_original_seed, my_dense_extractor, row, std::move(oracle), std::forward<Args_>(args)..., ticks, map, stats));
357 typedef FromSparse_<true, oracle_, Value_, Index_, CachedValue_, CachedIndex_> ShortSparse;
358 output.reset(
new ShortSparse(my_original_seed, my_sparse_extractor, row, std::move(oracle), std::forward<Args_>(args)..., max_target_chunk_length, ticks, map, stats));
360 typedef FromSparse_<false, oracle_, Value_, Index_, CachedValue_, CachedIndex_> ShortSparse;
361 output.reset(
new ShortSparse(my_original_seed, my_sparse_extractor, row, std::move(oracle), std::forward<Args_>(args)..., max_target_chunk_length, ticks, map, stats));
365#ifdef TATAMI_R_PARALLELIZE_UNKNOWN
372 template<
bool oracle_>
374 Index_ non_target_dim = secondary_dim(row);
375 return populate_dense_internal<oracle_, UnknownMatrix_internal::DenseFull, UnknownMatrix_internal::DensifiedSparseFull>(row, non_target_dim, std::move(ora), non_target_dim);
378 template<
bool oracle_>
380 return populate_dense_internal<oracle_, UnknownMatrix_internal::DenseBlock, UnknownMatrix_internal::DensifiedSparseBlock>(row, block_length, std::move(ora), block_start, block_length);
383 template<
bool oracle_>
385 Index_ nidx = indices_ptr->size();
386 return populate_dense_internal<oracle_, UnknownMatrix_internal::DenseIndexed, UnknownMatrix_internal::DensifiedSparseIndexed>(row, nidx, std::move(ora), std::move(indices_ptr));
390 std::unique_ptr<tatami::MyopicDenseExtractor<Value_, Index_> > dense(
bool row,
const tatami::Options& opt)
const {
391 return populate_dense<false>(row,
false, opt);
394 std::unique_ptr<tatami::MyopicDenseExtractor<Value_, Index_> > dense(
bool row, Index_ block_start, Index_ block_length,
const tatami::Options& opt)
const {
395 return populate_dense<false>(row,
false, block_start, block_length, opt);
399 return populate_dense<false>(row,
false, std::move(indices_ptr), opt);
407 return populate_dense<true>(row, std::move(ora), opt);
410 std::unique_ptr<tatami::OracularDenseExtractor<Value_, Index_> > dense(
bool row, std::shared_ptr<
const tatami::Oracle<Index_> > ora, Index_ block_start, Index_ block_length,
const tatami::Options& opt)
const {
411 return populate_dense<true>(row, std::move(ora), block_start, block_length, opt);
415 return populate_dense<true>(row, std::move(ora), std::move(indices_ptr), opt);
424 template<
bool,
bool,
typename,
typename,
typename,
typename>
class FromSparse_,
427 std::unique_ptr<tatami::SparseExtractor<oracle_, Value_, Index_> > populate_sparse_internal(
429 Index_ non_target_length,
434 std::unique_ptr<tatami::SparseExtractor<oracle_, Value_, Index_> > output;
436 Index_ max_target_chunk_length = max_primary_chunk_length(row);
437 tatami_chunked::SlabCacheStats<Index_> stats(
438 max_target_chunk_length,
440 primary_num_chunks(row, max_target_chunk_length),
441 my_cache_size_in_bytes,
443 my_require_minimum_cache
446 const auto& map = chunk_map(row);
447 const auto& ticks = chunk_ticks(row);
450 bool solo = stats.max_slabs_in_cache == 0;
452#ifdef TATAMI_R_PARALLELIZE_UNKNOWN
454 auto& mexec = executor();
455 mexec.run([&]() ->
void {
459 typedef FromSparse_<true, oracle_, Value_, Index_, CachedValue_, CachedIndex_> ShortSparse;
460 output.reset(
new ShortSparse(my_original_seed, my_sparse_extractor, row, std::move(oracle), std::forward<Args_>(args)..., max_target_chunk_length, ticks, map, stats, needs_value, needs_index));
462 typedef FromSparse_<false, oracle_, Value_, Index_, CachedValue_, CachedIndex_> ShortSparse;
463 output.reset(
new ShortSparse(my_original_seed, my_sparse_extractor, row, std::move(oracle), std::forward<Args_>(args)..., max_target_chunk_length, ticks, map, stats, needs_value, needs_index));
466#ifdef TATAMI_R_PARALLELIZE_UNKNOWN
473 template<
bool oracle_>
475 Index_ non_target_dim = secondary_dim(row);
476 return populate_sparse_internal<oracle_, UnknownMatrix_internal::SparseFull>(row, non_target_dim, std::move(ora), opt, non_target_dim);
479 template<
bool oracle_>
481 return populate_sparse_internal<oracle_, UnknownMatrix_internal::SparseBlock>(row, block_length, std::move(ora), opt, block_start, block_length);
484 template<
bool oracle_>
486 Index_ nidx = indices_ptr->size();
487 return populate_sparse_internal<oracle_, UnknownMatrix_internal::SparseIndexed>(row, nidx, std::move(ora), opt, std::move(indices_ptr));
491 std::unique_ptr<tatami::MyopicSparseExtractor<Value_, Index_> > sparse(
bool row,
const tatami::Options& opt)
const {
493 return std::make_unique<tatami::FullSparsifiedWrapper<false, Value_, Index_> >(dense(row, opt), secondary_dim(row), opt);
495 return populate_sparse<false>(row,
false, opt);
499 std::unique_ptr<tatami::MyopicSparseExtractor<Value_, Index_> > sparse(
bool row, Index_ block_start, Index_ block_length,
const tatami::Options& opt)
const {
501 return std::make_unique<tatami::BlockSparsifiedWrapper<false, Value_, Index_> >(dense(row, block_start, block_length, opt), block_start, block_length, opt);
503 return populate_sparse<false>(row,
false, block_start, block_length, opt);
509 auto index_copy = indices_ptr;
510 return std::make_unique<tatami::IndexSparsifiedWrapper<false, Value_, Index_> >(dense(row, std::move(indices_ptr), opt), std::move(index_copy), opt);
512 return populate_sparse<false>(row,
false, std::move(indices_ptr), opt);
522 return std::make_unique<tatami::FullSparsifiedWrapper<true, Value_, Index_> >(dense(row, std::move(ora), opt), secondary_dim(row), opt);
524 return populate_sparse<true>(row, std::move(ora), opt);
528 std::unique_ptr<tatami::OracularSparseExtractor<Value_, Index_> > sparse(
bool row, std::shared_ptr<
const tatami::Oracle<Index_> > ora, Index_ block_start, Index_ block_length,
const tatami::Options& opt)
const {
530 return std::make_unique<tatami::BlockSparsifiedWrapper<true, Value_, Index_> >(dense(row, std::move(ora), block_start, block_length, opt), block_start, block_length, opt);
532 return populate_sparse<true>(row, std::move(ora), block_start, block_length, opt);
538 auto index_copy = indices_ptr;
539 return std::make_unique<tatami::IndexSparsifiedWrapper<true, Value_, Index_> >(dense(row, std::move(ora), std::move(indices_ptr), opt), std::move(index_copy), opt);
541 return populate_sparse<true>(row, std::move(ora), std::move(indices_ptr), opt);