60 my_original_seed(seed),
61 my_delayed_env(Rcpp::Environment::namespace_env(
"DelayedArray")),
62 my_sparse_env(Rcpp::Environment::namespace_env(
"SparseArray")),
63 my_dense_extractor(my_delayed_env[
"extract_array"]),
64 my_sparse_extractor(my_sparse_env[
"extract_sparse_array"])
71 auto base = Rcpp::Environment::base_env();
72 Rcpp::Function fun = base[
"dim"];
73 Rcpp::RObject output = fun(seed);
74 if (output.sexp_type() != INTSXP) {
75 auto ctype = get_class_name(my_original_seed);
76 throw std::runtime_error(
"'dim(<" + ctype +
">)' should return an integer vector");
78 Rcpp::IntegerVector dims(output);
79 if (dims.size() != 2 || dims[0] < 0 || dims[1] < 0) {
80 auto ctype = get_class_name(my_original_seed);
81 throw std::runtime_error(
"'dim(<" + ctype +
">)' should contain two non-negative integers");
89 Rcpp::Function fun = my_delayed_env[
"is_sparse"];
90 Rcpp::LogicalVector is_sparse = fun(seed);
91 if (is_sparse.size() != 1) {
92 auto ctype = get_class_name(my_original_seed);
93 throw std::runtime_error(
"'is_sparse(<" + ctype +
">)' should return a logical vector of length 1");
95 my_sparse = (is_sparse[0] != 0);
99 my_row_chunk_map.resize(my_nrow);
100 my_col_chunk_map.resize(my_ncol);
102 Rcpp::Function fun = my_delayed_env[
"chunkGrid"];
103 Rcpp::RObject grid = fun(seed);
105 if (grid == R_NilValue) {
106 my_row_max_chunk_size = 1;
107 my_col_max_chunk_size = 1;
108 std::iota(my_row_chunk_map.begin(), my_row_chunk_map.end(),
static_cast<Index_
>(0));
109 std::iota(my_col_chunk_map.begin(), my_col_chunk_map.end(),
static_cast<Index_
>(0));
110 my_row_chunk_ticks.resize(my_nrow + 1);
111 std::iota(my_row_chunk_ticks.begin(), my_row_chunk_ticks.end(),
static_cast<Index_
>(0));
112 my_col_chunk_ticks.resize(my_ncol + 1);
113 std::iota(my_col_chunk_ticks.begin(), my_col_chunk_ticks.end(),
static_cast<Index_
>(0));
118 my_prefer_rows =
false;
121 auto grid_cls = get_class_name(grid);
123 if (grid_cls ==
"RegularArrayGrid") {
124 Rcpp::IntegerVector spacings(Rcpp::RObject(grid.slot(
"spacings")));
125 if (spacings.size() != 2) {
126 auto ctype = get_class_name(seed);
127 throw std::runtime_error(
"'chunkGrid(<" + ctype +
">)@spacings' should be an integer vector of length 2 with non-negative values");
130 auto populate = [](Index_ extent, Index_ spacing, std::vector<Index_>& map, std::vector<Index_>& ticks) {
134 ticks.reserve((extent / spacing) + (extent % spacing > 0) + 1);
136 ticks.push_back(start);
137 while (start != extent) {
138 auto to_fill = std::min(spacing, extent - start);
139 std::fill_n(map.begin() + start, to_fill, ticks.size() - 1);
141 ticks.push_back(start);
146 my_row_max_chunk_size = spacings[0];
147 populate(my_nrow, my_row_max_chunk_size, my_row_chunk_map, my_row_chunk_ticks);
148 my_col_max_chunk_size = spacings[1];
149 populate(my_ncol, my_col_max_chunk_size, my_col_chunk_map, my_col_chunk_ticks);
151 }
else if (grid_cls ==
"ArbitraryArrayGrid") {
152 Rcpp::List ticks(Rcpp::RObject(grid.slot(
"tickmarks")));
153 if (ticks.size() != 2) {
154 auto ctype = get_class_name(seed);
155 throw std::runtime_error(
"'chunkGrid(<" + ctype +
">)@tickmarks' should return a list of length 2");
158 auto populate = [](Index_ extent,
const Rcpp::IntegerVector& ticks, std::vector<Index_>& map, std::vector<Index_>& new_ticks, Index_& max_chunk_size) {
159 if (ticks.size() == 0 || ticks[ticks.size() - 1] !=
static_cast<int>(extent)) {
160 throw std::runtime_error(
"invalid ticks returned by 'chunkGrid'");
162 new_ticks.resize(ticks.size() + 1);
163 std::copy(ticks.begin(), ticks.end(), new_ticks.begin() + 1);
170 for (
auto t : ticks) {
172 throw std::runtime_error(
"invalid ticks returned by 'chunkGrid'");
174 Index_ to_fill = t - start;
175 if (to_fill > max_chunk_size) {
176 max_chunk_size = to_fill;
178 std::fill_n(map.begin() + start, to_fill, counter);
184 Rcpp::IntegerVector first(ticks[0]);
185 populate(my_nrow, first, my_row_chunk_map, my_row_chunk_ticks, my_row_max_chunk_size);
186 Rcpp::IntegerVector second(ticks[1]);
187 populate(my_ncol, second, my_col_chunk_map, my_col_chunk_ticks, my_col_max_chunk_size);
190 auto ctype = get_class_name(seed);
191 throw std::runtime_error(
"instance of unknown class '" + grid_cls +
"' returned by 'chunkGrid(<" + ctype +
">)");
195 auto chunks_per_row = my_col_chunk_ticks.size() - 1;
196 auto chunks_per_col = my_row_chunk_ticks.size() - 1;
197 my_prefer_rows = chunks_per_row <= chunks_per_col;
203 if (my_cache_size_in_bytes ==
static_cast<size_t>(-1)) {
204 Rcpp::Function fun = my_delayed_env[
"getAutoBlockSize"];
205 Rcpp::NumericVector bsize = fun();
206 if (bsize.size() != 1 || bsize[0] < 0) {
207 throw std::runtime_error(
"'getAutoBlockSize()' should return a non-negative number of bytes");
209 my_cache_size_in_bytes = bsize[0];
222 Index_ my_nrow, my_ncol;
223 bool my_sparse, my_prefer_rows;
225 std::vector<Index_> my_row_chunk_map, my_col_chunk_map;
226 std::vector<Index_> my_row_chunk_ticks, my_col_chunk_ticks;
236 Index_ my_row_max_chunk_size, my_col_max_chunk_size;
238 size_t my_cache_size_in_bytes;
239 bool my_require_minimum_cache;
241 Rcpp::RObject my_original_seed;
242 Rcpp::Environment my_delayed_env, my_sparse_env;
243 Rcpp::Function my_dense_extractor, my_sparse_extractor;
246 Index_ nrow()
const {
250 Index_ ncol()
const {
254 bool is_sparse()
const {
258 double is_sparse_proportion()
const {
259 return static_cast<double>(my_sparse);
262 bool prefer_rows()
const {
263 return my_prefer_rows;
266 double prefer_rows_proportion()
const {
267 return static_cast<double>(my_prefer_rows);
270 bool uses_oracle(
bool)
const {
275 Index_ max_primary_chunk_length(
bool row)
const {
276 return (row ? my_row_max_chunk_size : my_col_max_chunk_size);
279 Index_ secondary_dim(
bool row)
const {
280 return (row ? my_ncol : my_nrow);
283 const std::vector<Index_>& chunk_ticks(
bool row)
const {
285 return my_row_chunk_ticks;
287 return my_col_chunk_ticks;
291 const std::vector<Index_>& chunk_map(
bool row)
const {
293 return my_row_chunk_map;
295 return my_col_chunk_map;
305 template <
bool,
bool,
typename,
typename,
typename>
class FromDense_,
306 template <
bool,
bool,
typename,
typename,
typename,
typename>
class FromSparse_,
309 std::unique_ptr<tatami::DenseExtractor<oracle_, Value_, Index_> > populate_dense_internal(
bool row, Index_ non_target_length,
tatami::MaybeOracle<oracle_, Index_> oracle, Args_&& ... args)
const {
310 std::unique_ptr<tatami::DenseExtractor<oracle_, Value_, Index_> > output;
312 Index_ max_target_chunk_length = max_primary_chunk_length(row);
313 tatami_chunked::SlabCacheStats stats(max_target_chunk_length, non_target_length, my_cache_size_in_bytes,
sizeof(CachedValue_), my_require_minimum_cache);
315 const auto& map = chunk_map(row);
316 const auto& ticks = chunk_ticks(row);
317 bool solo = (stats.max_slabs_in_cache == 0);
319#ifdef TATAMI_R_PARALLELIZE_UNKNOWN
321 auto& mexec = executor();
322 mexec.run([&]() ->
void {
327 typedef FromDense_<true, oracle_, Value_, Index_, CachedValue_> ShortDense;
328 output.reset(
new ShortDense(my_original_seed, my_dense_extractor, row, std::move(oracle), std::forward<Args_>(args)..., ticks, map, stats));
330 typedef FromDense_<false, oracle_, Value_, Index_, CachedValue_> ShortDense;
331 output.reset(
new ShortDense(my_original_seed, my_dense_extractor, row, std::move(oracle), std::forward<Args_>(args)..., ticks, map, stats));
335 typedef FromSparse_<true, oracle_, Value_, Index_, CachedValue_, CachedIndex_> ShortSparse;
336 output.reset(
new ShortSparse(my_original_seed, my_sparse_extractor, row, std::move(oracle), std::forward<Args_>(args)..., max_target_chunk_length, ticks, map, stats));
338 typedef FromSparse_<false, oracle_, Value_, Index_, CachedValue_, CachedIndex_> ShortSparse;
339 output.reset(
new ShortSparse(my_original_seed, my_sparse_extractor, row, std::move(oracle), std::forward<Args_>(args)..., max_target_chunk_length, ticks, map, stats));
343#ifdef TATAMI_R_PARALLELIZE_UNKNOWN
350 template<
bool oracle_>
352 Index_ non_target_dim = secondary_dim(row);
353 return populate_dense_internal<oracle_, UnknownMatrix_internal::DenseFull, UnknownMatrix_internal::DensifiedSparseFull>(row, non_target_dim, std::move(ora), non_target_dim);
356 template<
bool oracle_>
358 return populate_dense_internal<oracle_, UnknownMatrix_internal::DenseBlock, UnknownMatrix_internal::DensifiedSparseBlock>(row, block_length, std::move(ora), block_start, block_length);
361 template<
bool oracle_>
363 Index_ nidx = indices_ptr->size();
364 return populate_dense_internal<oracle_, UnknownMatrix_internal::DenseIndexed, UnknownMatrix_internal::DensifiedSparseIndexed>(row, nidx, std::move(ora), std::move(indices_ptr));
368 std::unique_ptr<tatami::MyopicDenseExtractor<Value_, Index_> > dense(
bool row,
const tatami::Options& opt)
const {
369 return populate_dense<false>(row,
false, opt);
372 std::unique_ptr<tatami::MyopicDenseExtractor<Value_, Index_> > dense(
bool row, Index_ block_start, Index_ block_length,
const tatami::Options& opt)
const {
373 return populate_dense<false>(row,
false, block_start, block_length, opt);
377 return populate_dense<false>(row,
false, std::move(indices_ptr), opt);
385 return populate_dense<true>(row, std::move(ora), opt);
388 std::unique_ptr<tatami::OracularDenseExtractor<Value_, Index_> > dense(
bool row, std::shared_ptr<
const tatami::Oracle<Index_> > ora, Index_ block_start, Index_ block_length,
const tatami::Options& opt)
const {
389 return populate_dense<true>(row, std::move(ora), block_start, block_length, opt);
393 return populate_dense<true>(row, std::move(ora), std::move(indices_ptr), opt);
402 template<
bool,
bool,
typename,
typename,
typename,
typename>
class FromSparse_,
405 std::unique_ptr<tatami::SparseExtractor<oracle_, Value_, Index_> > populate_sparse_internal(
407 Index_ non_target_length,
412 std::unique_ptr<tatami::SparseExtractor<oracle_, Value_, Index_> > output;
414 Index_ max_target_chunk_length = max_primary_chunk_length(row);
415 tatami_chunked::SlabCacheStats stats(
416 max_target_chunk_length,
418 my_cache_size_in_bytes,
420 my_require_minimum_cache
423 const auto& map = chunk_map(row);
424 const auto& ticks = chunk_ticks(row);
427 bool solo = stats.max_slabs_in_cache == 0;
429#ifdef TATAMI_R_PARALLELIZE_UNKNOWN
431 auto& mexec = executor();
432 mexec.run([&]() ->
void {
436 typedef FromSparse_<true, oracle_, Value_, Index_, CachedValue_, CachedIndex_> ShortSparse;
437 output.reset(
new ShortSparse(my_original_seed, my_sparse_extractor, row, std::move(oracle), std::forward<Args_>(args)..., max_target_chunk_length, ticks, map, stats, needs_value, needs_index));
439 typedef FromSparse_<false, oracle_, Value_, Index_, CachedValue_, CachedIndex_> ShortSparse;
440 output.reset(
new ShortSparse(my_original_seed, my_sparse_extractor, row, std::move(oracle), std::forward<Args_>(args)..., max_target_chunk_length, ticks, map, stats, needs_value, needs_index));
443#ifdef TATAMI_R_PARALLELIZE_UNKNOWN
450 template<
bool oracle_>
452 Index_ non_target_dim = secondary_dim(row);
453 return populate_sparse_internal<oracle_, UnknownMatrix_internal::SparseFull>(row, non_target_dim, std::move(ora), opt, non_target_dim);
456 template<
bool oracle_>
458 return populate_sparse_internal<oracle_, UnknownMatrix_internal::SparseBlock>(row, block_length, std::move(ora), opt, block_start, block_length);
461 template<
bool oracle_>
463 Index_ nidx = indices_ptr->size();
464 return populate_sparse_internal<oracle_, UnknownMatrix_internal::SparseIndexed>(row, nidx, std::move(ora), opt, std::move(indices_ptr));
468 std::unique_ptr<tatami::MyopicSparseExtractor<Value_, Index_> > sparse(
bool row,
const tatami::Options& opt)
const {
470 return std::make_unique<tatami::FullSparsifiedWrapper<false, Value_, Index_> >(dense(row, opt), secondary_dim(row), opt);
472 return populate_sparse<false>(row,
false, opt);
476 std::unique_ptr<tatami::MyopicSparseExtractor<Value_, Index_> > sparse(
bool row, Index_ block_start, Index_ block_length,
const tatami::Options& opt)
const {
478 return std::make_unique<tatami::BlockSparsifiedWrapper<false, Value_, Index_> >(dense(row, block_start, block_length, opt), block_start, block_length, opt);
480 return populate_sparse<false>(row,
false, block_start, block_length, opt);
486 auto index_copy = indices_ptr;
487 return std::make_unique<tatami::IndexSparsifiedWrapper<false, Value_, Index_> >(dense(row, std::move(indices_ptr), opt), std::move(index_copy), opt);
489 return populate_sparse<false>(row,
false, std::move(indices_ptr), opt);
499 return std::make_unique<tatami::FullSparsifiedWrapper<true, Value_, Index_> >(dense(row, std::move(ora), opt), secondary_dim(row), opt);
501 return populate_sparse<true>(row, std::move(ora), opt);
505 std::unique_ptr<tatami::OracularSparseExtractor<Value_, Index_> > sparse(
bool row, std::shared_ptr<
const tatami::Oracle<Index_> > ora, Index_ block_start, Index_ block_length,
const tatami::Options& opt)
const {
507 return std::make_unique<tatami::BlockSparsifiedWrapper<true, Value_, Index_> >(dense(row, std::move(ora), block_start, block_length, opt), block_start, block_length, opt);
509 return populate_sparse<true>(row, std::move(ora), block_start, block_length, opt);
515 auto index_copy = indices_ptr;
516 return std::make_unique<tatami::IndexSparsifiedWrapper<true, Value_, Index_> >(dense(row, std::move(ora), std::move(indices_ptr), opt), std::move(index_copy), opt);
518 return populate_sparse<true>(row, std::move(ora), std::move(indices_ptr), opt);