58 my_original_seed(seed),
59 my_delayed_env(Rcpp::Environment::namespace_env(
"DelayedArray")),
60 my_sparse_env(Rcpp::Environment::namespace_env(
"SparseArray")),
61 my_dense_extractor(my_delayed_env[
"extract_array"]),
62 my_sparse_extractor(my_sparse_env[
"extract_sparse_array"])
69 auto base = Rcpp::Environment::base_env();
70 Rcpp::Function fun = base[
"dim"];
71 Rcpp::RObject output = fun(seed);
72 if (output.sexp_type() != INTSXP) {
73 auto ctype = get_class_name(my_original_seed);
74 throw std::runtime_error(
"'dim(<" + ctype +
">)' should return an integer vector");
76 Rcpp::IntegerVector dims(output);
77 if (dims.size() != 2 || dims[0] < 0 || dims[1] < 0) {
78 auto ctype = get_class_name(my_original_seed);
79 throw std::runtime_error(
"'dim(<" + ctype +
">)' should contain two non-negative integers");
87 Rcpp::Function fun = my_delayed_env[
"is_sparse"];
88 Rcpp::LogicalVector is_sparse = fun(seed);
89 if (is_sparse.size() != 1) {
90 auto ctype = get_class_name(my_original_seed);
91 throw std::runtime_error(
"'is_sparse(<" + ctype +
">)' should return a logical vector of length 1");
93 my_sparse = (is_sparse[0] != 0);
97 my_row_chunk_map.resize(my_nrow);
98 my_col_chunk_map.resize(my_ncol);
100 Rcpp::Function fun = my_delayed_env[
"chunkGrid"];
101 Rcpp::RObject grid = fun(seed);
103 if (grid == R_NilValue) {
104 my_row_max_chunk_size = 1;
105 my_col_max_chunk_size = 1;
106 std::iota(my_row_chunk_map.begin(), my_row_chunk_map.end(),
static_cast<Index_
>(0));
107 std::iota(my_col_chunk_map.begin(), my_col_chunk_map.end(),
static_cast<Index_
>(0));
108 my_row_chunk_ticks.resize(my_nrow + 1);
109 std::iota(my_row_chunk_ticks.begin(), my_row_chunk_ticks.end(),
static_cast<Index_
>(0));
110 my_col_chunk_ticks.resize(my_ncol + 1);
111 std::iota(my_col_chunk_ticks.begin(), my_col_chunk_ticks.end(),
static_cast<Index_
>(0));
116 my_prefer_rows =
false;
119 auto grid_cls = get_class_name(grid);
121 if (grid_cls ==
"RegularArrayGrid") {
122 Rcpp::IntegerVector spacings(Rcpp::RObject(grid.slot(
"spacings")));
123 if (spacings.size() != 2) {
124 auto ctype = get_class_name(seed);
125 throw std::runtime_error(
"'chunkGrid(<" + ctype +
">)@spacings' should be an integer vector of length 2 with non-negative values");
128 auto populate = [](Index_ extent, Index_ spacing, std::vector<Index_>& map, std::vector<Index_>& ticks) {
132 ticks.reserve((extent / spacing) + (extent % spacing > 0) + 1);
134 ticks.push_back(start);
135 while (start != extent) {
136 auto to_fill = std::min(spacing, extent - start);
137 std::fill_n(map.begin() + start, to_fill, ticks.size() - 1);
139 ticks.push_back(start);
144 my_row_max_chunk_size = spacings[0];
145 populate(my_nrow, my_row_max_chunk_size, my_row_chunk_map, my_row_chunk_ticks);
146 my_col_max_chunk_size = spacings[1];
147 populate(my_ncol, my_col_max_chunk_size, my_col_chunk_map, my_col_chunk_ticks);
149 }
else if (grid_cls ==
"ArbitraryArrayGrid") {
150 Rcpp::List ticks(Rcpp::RObject(grid.slot(
"tickmarks")));
151 if (ticks.size() != 2) {
152 auto ctype = get_class_name(seed);
153 throw std::runtime_error(
"'chunkGrid(<" + ctype +
">)@tickmarks' should return a list of length 2");
156 auto populate = [](Index_ extent,
const Rcpp::IntegerVector& ticks, std::vector<Index_>& map, std::vector<Index_>& new_ticks, Index_& max_chunk_size) {
157 if (ticks.size() == 0 || ticks[ticks.size() - 1] !=
static_cast<int>(extent)) {
158 throw std::runtime_error(
"invalid ticks returned by 'chunkGrid'");
160 new_ticks.resize(ticks.size() + 1);
161 std::copy(ticks.begin(), ticks.end(), new_ticks.begin() + 1);
168 for (
auto t : ticks) {
170 throw std::runtime_error(
"invalid ticks returned by 'chunkGrid'");
172 Index_ to_fill = t - start;
173 if (to_fill > max_chunk_size) {
174 max_chunk_size = to_fill;
176 std::fill_n(map.begin() + start, to_fill, counter);
182 Rcpp::IntegerVector first(ticks[0]);
183 populate(my_nrow, first, my_row_chunk_map, my_row_chunk_ticks, my_row_max_chunk_size);
184 Rcpp::IntegerVector second(ticks[1]);
185 populate(my_ncol, second, my_col_chunk_map, my_col_chunk_ticks, my_col_max_chunk_size);
188 auto ctype = get_class_name(seed);
189 throw std::runtime_error(
"instance of unknown class '" + grid_cls +
"' returned by 'chunkGrid(<" + ctype +
">)");
193 auto chunks_per_row = my_col_chunk_ticks.size() - 1;
194 auto chunks_per_col = my_row_chunk_ticks.size() - 1;
195 my_prefer_rows = chunks_per_row <= chunks_per_col;
201 if (my_cache_size_in_bytes ==
static_cast<size_t>(-1)) {
202 Rcpp::Function fun = my_delayed_env[
"getAutoBlockSize"];
203 Rcpp::NumericVector bsize = fun();
204 if (bsize.size() != 1 || bsize[0] < 0) {
205 throw std::runtime_error(
"'getAutoBlockSize()' should return a non-negative number of bytes");
207 my_cache_size_in_bytes = bsize[0];
220 Index_ my_nrow, my_ncol;
221 bool my_sparse, my_prefer_rows;
223 std::vector<Index_> my_row_chunk_map, my_col_chunk_map;
224 std::vector<Index_> my_row_chunk_ticks, my_col_chunk_ticks;
234 Index_ my_row_max_chunk_size, my_col_max_chunk_size;
236 size_t my_cache_size_in_bytes;
237 bool my_require_minimum_cache;
239 Rcpp::RObject my_original_seed;
240 Rcpp::Environment my_delayed_env, my_sparse_env;
241 Rcpp::Function my_dense_extractor, my_sparse_extractor;
244 Index_ nrow()
const {
248 Index_ ncol()
const {
252 bool is_sparse()
const {
256 double is_sparse_proportion()
const {
257 return static_cast<double>(my_sparse);
260 bool prefer_rows()
const {
261 return my_prefer_rows;
264 double prefer_rows_proportion()
const {
265 return static_cast<double>(my_prefer_rows);
268 bool uses_oracle(
bool)
const {
273 Index_ max_primary_chunk_length(
bool row)
const {
274 return (row ? my_row_max_chunk_size : my_col_max_chunk_size);
277 Index_ secondary_dim(
bool row)
const {
278 return (row ? my_ncol : my_nrow);
281 const std::vector<Index_>& chunk_ticks(
bool row)
const {
283 return my_row_chunk_ticks;
285 return my_col_chunk_ticks;
289 const std::vector<Index_>& chunk_map(
bool row)
const {
291 return my_row_chunk_map;
293 return my_col_chunk_map;
303 template <
bool,
bool,
typename,
typename,
typename>
class FromDense_,
304 template <
bool,
bool,
typename,
typename,
typename,
typename>
class FromSparse_,
307 std::unique_ptr<tatami::DenseExtractor<oracle_, Value_, Index_> > populate_dense_internal(
bool row, Index_ non_target_length,
tatami::MaybeOracle<oracle_, Index_> oracle, Args_&& ... args)
const {
308 std::unique_ptr<tatami::DenseExtractor<oracle_, Value_, Index_> > output;
310 Index_ max_target_chunk_length = max_primary_chunk_length(row);
311 tatami_chunked::SlabCacheStats stats(max_target_chunk_length, non_target_length, my_cache_size_in_bytes,
sizeof(CachedValue_), my_require_minimum_cache);
313 const auto& map = chunk_map(row);
314 const auto& ticks = chunk_ticks(row);
315 bool solo = (stats.max_slabs_in_cache == 0);
317#ifdef TATAMI_R_PARALLELIZE_UNKNOWN
320 mexec.run([&]() ->
void {
325 typedef FromDense_<true, oracle_, Value_, Index_, CachedValue_> ShortDense;
326 output.reset(
new ShortDense(my_original_seed, my_dense_extractor, row, std::move(oracle), std::forward<Args_>(args)..., ticks, map, stats));
328 typedef FromDense_<false, oracle_, Value_, Index_, CachedValue_> ShortDense;
329 output.reset(
new ShortDense(my_original_seed, my_dense_extractor, row, std::move(oracle), std::forward<Args_>(args)..., ticks, map, stats));
333 typedef FromSparse_<true, oracle_, Value_, Index_, CachedValue_, CachedIndex_> ShortSparse;
334 output.reset(
new ShortSparse(my_original_seed, my_sparse_extractor, row, std::move(oracle), std::forward<Args_>(args)..., max_target_chunk_length, ticks, map, stats));
336 typedef FromSparse_<false, oracle_, Value_, Index_, CachedValue_, CachedIndex_> ShortSparse;
337 output.reset(
new ShortSparse(my_original_seed, my_sparse_extractor, row, std::move(oracle), std::forward<Args_>(args)..., max_target_chunk_length, ticks, map, stats));
341#ifdef TATAMI_R_PARALLELIZE_UNKNOWN
348 template<
bool oracle_>
350 Index_ non_target_dim = secondary_dim(row);
351 return populate_dense_internal<oracle_, UnknownMatrix_internal::DenseFull, UnknownMatrix_internal::DensifiedSparseFull>(row, non_target_dim, std::move(ora), non_target_dim);
354 template<
bool oracle_>
356 return populate_dense_internal<oracle_, UnknownMatrix_internal::DenseBlock, UnknownMatrix_internal::DensifiedSparseBlock>(row, block_length, std::move(ora), block_start, block_length);
359 template<
bool oracle_>
361 Index_ nidx = indices_ptr->size();
362 return populate_dense_internal<oracle_, UnknownMatrix_internal::DenseIndexed, UnknownMatrix_internal::DensifiedSparseIndexed>(row, nidx, std::move(ora), std::move(indices_ptr));
366 std::unique_ptr<tatami::MyopicDenseExtractor<Value_, Index_> > dense(
bool row,
const tatami::Options& opt)
const {
367 return populate_dense<false>(row,
false, opt);
370 std::unique_ptr<tatami::MyopicDenseExtractor<Value_, Index_> > dense(
bool row, Index_ block_start, Index_ block_length,
const tatami::Options& opt)
const {
371 return populate_dense<false>(row,
false, block_start, block_length, opt);
375 return populate_dense<false>(row,
false, std::move(indices_ptr), opt);
383 return populate_dense<true>(row, std::move(ora), opt);
386 std::unique_ptr<tatami::OracularDenseExtractor<Value_, Index_> > dense(
bool row, std::shared_ptr<
const tatami::Oracle<Index_> > ora, Index_ block_start, Index_ block_length,
const tatami::Options& opt)
const {
387 return populate_dense<true>(row, std::move(ora), block_start, block_length, opt);
391 return populate_dense<true>(row, std::move(ora), std::move(indices_ptr), opt);
400 template<
bool,
bool,
typename,
typename,
typename,
typename>
class FromSparse_,
403 std::unique_ptr<tatami::SparseExtractor<oracle_, Value_, Index_> > populate_sparse_internal(
405 Index_ non_target_length,
410 std::unique_ptr<tatami::SparseExtractor<oracle_, Value_, Index_> > output;
412 Index_ max_target_chunk_length = max_primary_chunk_length(row);
413 tatami_chunked::SlabCacheStats stats(
414 max_target_chunk_length,
416 my_cache_size_in_bytes,
418 my_require_minimum_cache
421 const auto& map = chunk_map(row);
422 const auto& ticks = chunk_ticks(row);
425 bool solo = stats.max_slabs_in_cache == 0;
427#ifdef TATAMI_R_PARALLELIZE_UNKNOWN
430 mexec.run([&]() ->
void {
434 typedef FromSparse_<true, oracle_, Value_, Index_, CachedValue_, CachedIndex_> ShortSparse;
435 output.reset(
new ShortSparse(my_original_seed, my_sparse_extractor, row, std::move(oracle), std::forward<Args_>(args)..., max_target_chunk_length, ticks, map, stats, needs_value, needs_index));
437 typedef FromSparse_<false, oracle_, Value_, Index_, CachedValue_, CachedIndex_> ShortSparse;
438 output.reset(
new ShortSparse(my_original_seed, my_sparse_extractor, row, std::move(oracle), std::forward<Args_>(args)..., max_target_chunk_length, ticks, map, stats, needs_value, needs_index));
441#ifdef TATAMI_R_PARALLELIZE_UNKNOWN
448 template<
bool oracle_>
450 Index_ non_target_dim = secondary_dim(row);
451 return populate_sparse_internal<oracle_, UnknownMatrix_internal::SparseFull>(row, non_target_dim, std::move(ora), opt, non_target_dim);
454 template<
bool oracle_>
456 return populate_sparse_internal<oracle_, UnknownMatrix_internal::SparseBlock>(row, block_length, std::move(ora), opt, block_start, block_length);
459 template<
bool oracle_>
461 Index_ nidx = indices_ptr->size();
462 return populate_sparse_internal<oracle_, UnknownMatrix_internal::SparseIndexed>(row, nidx, std::move(ora), opt, std::move(indices_ptr));
466 std::unique_ptr<tatami::MyopicSparseExtractor<Value_, Index_> > sparse(
bool row,
const tatami::Options& opt)
const {
468 return std::make_unique<tatami::FullSparsifiedWrapper<false, Value_, Index_> >(dense(row, opt), secondary_dim(row), opt);
470 return populate_sparse<false>(row,
false, opt);
474 std::unique_ptr<tatami::MyopicSparseExtractor<Value_, Index_> > sparse(
bool row, Index_ block_start, Index_ block_length,
const tatami::Options& opt)
const {
476 return std::make_unique<tatami::BlockSparsifiedWrapper<false, Value_, Index_> >(dense(row, block_start, block_length, opt), block_start, block_length, opt);
478 return populate_sparse<false>(row,
false, block_start, block_length, opt);
484 auto index_copy = indices_ptr;
485 return std::make_unique<tatami::IndexSparsifiedWrapper<false, Value_, Index_> >(dense(row, std::move(indices_ptr), opt), std::move(index_copy), opt);
487 return populate_sparse<false>(row,
false, std::move(indices_ptr), opt);
497 return std::make_unique<tatami::FullSparsifiedWrapper<true, Value_, Index_> >(dense(row, std::move(ora), opt), secondary_dim(row), opt);
499 return populate_sparse<true>(row, std::move(ora), opt);
503 std::unique_ptr<tatami::OracularSparseExtractor<Value_, Index_> > sparse(
bool row, std::shared_ptr<
const tatami::Oracle<Index_> > ora, Index_ block_start, Index_ block_length,
const tatami::Options& opt)
const {
505 return std::make_unique<tatami::BlockSparsifiedWrapper<true, Value_, Index_> >(dense(row, std::move(ora), block_start, block_length, opt), block_start, block_length, opt);
507 return populate_sparse<true>(row, std::move(ora), block_start, block_length, opt);
513 auto index_copy = indices_ptr;
514 return std::make_unique<tatami::IndexSparsifiedWrapper<true, Value_, Index_> >(dense(row, std::move(ora), std::move(indices_ptr), opt), std::move(index_copy), opt);
516 return populate_sparse<true>(row, std::move(ora), std::move(indices_ptr), opt);