1#ifndef TATAMI_DELAYED_SUBSET_HPP
2#define TATAMI_DELAYED_SUBSET_HPP
5#include "../utils/Index_to_container.hpp"
10#include "sanisizer/sanisizer.hpp"
25namespace DelayedSubset_internal {
27template<
typename Index_>
28struct DenseParallelResults {
29 std::vector<Index_> collapsed;
30 std::vector<Index_> reindex;
33template<
typename Index_,
class SubsetStorage_,
class ToIndex_>
34DenseParallelResults<Index_> format_dense_parallel_base(
const SubsetStorage_& subset, Index_ len, ToIndex_ to_index) {
35 std::vector<std::pair<Index_, Index_> > collected;
36 collected.reserve(len);
37 for (Index_ i = 0; i < len; ++i) {
38 collected.emplace_back(subset[to_index(i)], i);
40 std::sort(collected.begin(), collected.end());
42 DenseParallelResults<Index_> output;
43 if (collected.size()) {
44 output.collapsed.reserve(len);
47 Index_ last = collected.front().first;
48 output.collapsed.push_back(last);
49 output.reindex[collected.front().second] = 0;
52 for (Index_ i = 1; i < len; ++i) {
53 const auto& pp = collected[i];
54 if (pp.first != last) {
56 output.collapsed.push_back(last);
59 output.reindex[pp.second] = counter;
66template<
bool oracle_,
typename Value_,
typename Index_>
67class ParallelDense final :
public DenseExtractor<oracle_, Value_, Index_> {
69 template<
class SubsetStorage_>
70 ParallelDense(
const Matrix<Value_, Index_>* matrix,
const SubsetStorage_& subset,
bool row, MaybeOracle<oracle_, Index_> oracle,
const Options& opt) {
71 auto processed = format_dense_parallel_base<Index_>(subset, subset.size(), [&](Index_ i) -> Index_ { return i; });
72 initialize(matrix, std::move(processed), row, std::move(oracle), opt);
75 template<
class SubsetStorage_>
76 ParallelDense(
const Matrix<Value_, Index_>* matrix,
const SubsetStorage_& subset,
bool row, MaybeOracle<oracle_, Index_> oracle, Index_ block_start, Index_ block_length,
const Options& opt) {
77 auto processed = format_dense_parallel_base<Index_>(subset, block_length, [&](Index_ i) -> Index_ {
return i + block_start; });
78 initialize(matrix, std::move(processed), row, std::move(oracle), opt);
81 template<
class SubsetStorage_>
82 ParallelDense(
const Matrix<Value_, Index_>* matrix,
const SubsetStorage_& subset,
bool row, MaybeOracle<oracle_, Index_> oracle, VectorPtr<Index_> indices_ptr,
const Options& opt) {
83 const auto& indices = *indices_ptr;
84 auto processed = format_dense_parallel_base<Index_>(subset, indices.size(), [&](Index_ i) -> Index_ { return indices[i]; });
85 initialize(matrix, std::move(processed), row, std::move(oracle), opt);
89 void initialize(
const Matrix<Value_, Index_>* matrix, DenseParallelResults<Index_> processed,
bool row, MaybeOracle<oracle_, Index_> oracle,
const Options& opt) {
91 my_ext = new_extractor<false, oracle_>(matrix, row, std::move(oracle), std::move(processed.collapsed), opt);
92 my_reindex.swap(processed.reindex);
96 const Value_* fetch(Index_ i, Value_* buffer) {
97 auto src = my_ext->fetch(i, my_holding_vbuffer.data());
101 for (
auto p : my_reindex) {
110 std::unique_ptr<DenseExtractor<oracle_, Value_, Index_> > my_ext;
111 std::vector<Value_> my_holding_vbuffer;
112 std::vector<Index_> my_reindex;
115template<
typename Index_>
116struct SparseParallelReindex {
123 std::vector<Index_> pool_ptrs;
124 std::vector<Index_> pool_indices;
128template<
typename Index_>
129struct SparseParallelResults {
130 std::vector<Index_> collapsed;
131 SparseParallelReindex<Index_> reindex;
134template<
typename Index_,
class SubsetStorage_,
class ToIndex_>
135SparseParallelResults<Index_> format_sparse_parallel_base(
const SubsetStorage_& indices, Index_ len, ToIndex_ to_index) {
136 std::vector<std::pair<Index_, Index_> > collected;
137 collected.reserve(len);
138 for (Index_ i = 0; i < len; ++i) {
139 auto curdex = to_index(i);
140 collected.emplace_back(indices[curdex], curdex);
142 std::sort(collected.begin(), collected.end());
144 SparseParallelResults<Index_> output;
146 if (collected.size()) {
147 output.collapsed.reserve(len);
148 output.reindex.pool_indices.reserve(len);
149 Index_ first = collected.front().first;
155 output.reindex.offset = first;
156 Index_ allocation = collected.back().first - output.reindex.offset + 1;
157 output.reindex.pool_ptrs.resize(sanisizer::sum<
decltype(output.reindex.pool_ptrs.size())>(allocation, 1));
160 output.reindex.pool_ptrs[counter] = 0;
162 output.reindex.pool_indices.push_back(collected.front().second);
163 output.reindex.pool_ptrs[counter] = 1;
164 output.collapsed.push_back(first);
167 for (Index_ i = 1; i < len; ++i) {
168 const auto& pp = collected[i];
169 auto current = pp.first;
170 if (current == last) {
171 output.reindex.pool_indices.push_back(pp.second);
172 ++(output.reindex.pool_ptrs[counter]);
176 Index_ pool_size = output.reindex.pool_indices.size();
177 counter = current - output.reindex.offset;
178 output.reindex.pool_ptrs[counter] = pool_size;
180 output.reindex.pool_indices.push_back(pp.second);
181 output.reindex.pool_ptrs[counter] = pool_size + 1;
182 output.collapsed.push_back(current);
190template<
bool oracle_,
typename Value_,
typename Index_>
191class ParallelSparse final :
public SparseExtractor<oracle_, Value_, Index_> {
193 template<
class SubsetStorage_>
194 ParallelSparse(
const Matrix<Value_, Index_>* mat,
const SubsetStorage_& subset,
bool row, MaybeOracle<oracle_, Index_> oracle,
const Options& opt) {
195 auto processed = format_sparse_parallel_base<Index_>(subset, subset.size(), [](Index_ i) -> Index_ { return i; });
196 initialize(mat, std::move(processed), subset.size(), row, std::move(oracle), opt);
199 template<
class SubsetStorage_>
200 ParallelSparse(
const Matrix<Value_, Index_>* mat,
const SubsetStorage_& subset,
bool row, MaybeOracle<oracle_, Index_> oracle, Index_ block_start, Index_ block_length,
const Options& opt) {
201 auto processed = format_sparse_parallel_base<Index_>(subset, block_length, [&](Index_ i) -> Index_ {
return i + block_start; });
202 initialize(mat, std::move(processed), block_length, row, std::move(oracle), opt);
205 template<
class SubsetStorage_>
206 ParallelSparse(
const Matrix<Value_, Index_>* mat,
const SubsetStorage_& subset,
bool row, MaybeOracle<oracle_, Index_> oracle, VectorPtr<Index_> indices_ptr,
const Options& opt) {
207 const auto& indices = *indices_ptr;
208 auto processed = format_sparse_parallel_base<Index_>(subset, indices.size(), [&](Index_ i) -> Index_ { return indices[i]; });
209 initialize(mat, std::move(processed), indices.size(), row, std::move(oracle), opt);
213 void initialize(
const Matrix<Value_, Index_>* mat, SparseParallelResults<Index_> processed, Index_ extent,
bool row, MaybeOracle<oracle_, Index_> oracle, Options opt) {
214 Index_ num_collapsed = processed.collapsed.size();
215 my_shift = extent - num_collapsed;
217 my_needs_value = opt.sparse_extract_value;
218 my_needs_index = opt.sparse_extract_index;
219 my_needs_sort = opt.sparse_ordered_index;
221 if (my_needs_sort && my_needs_value) {
222 my_sortspace.reserve(extent);
226 opt.sparse_extract_index =
true;
227 if (!my_needs_index) {
231 my_ext = new_extractor<true, oracle_>(mat, row, std::move(oracle), std::move(processed.collapsed), opt);
232 my_reindex = std::move(processed.reindex);
236 SparseRange<Value_, Index_> fetch(Index_ i, Value_* vbuffer, Index_* ibuffer) {
237 auto vinit = (my_needs_value ? vbuffer + my_shift : NULL);
238 auto iinit = (my_needs_index ? ibuffer + my_shift : my_holding_ibuffer.data());
239 auto input = my_ext->fetch(i, vinit, iinit);
241 if (!my_needs_sort) {
250 auto vcopy = vbuffer;
251 auto icopy = ibuffer;
253 auto vsrc = input.value;
254 bool replace_value = my_needs_value && vsrc != vcopy;
256 for (Index_ i = 0; i < input.number; ++i) {
257 auto lookup = input.index[i] - my_reindex.offset;
258 auto start = my_reindex.pool_ptrs[lookup];
259 auto num = my_reindex.pool_ptrs[lookup + 1] - start;
264 std::fill_n(vcopy, num, val);
267 replace_value = (vcopy != vsrc);
270 if (my_needs_index) {
275 std::copy_n(my_reindex.pool_indices.begin() + start, num, icopy);
280 input.number = count;
281 if (my_needs_value) {
282 input.value = vbuffer;
284 if (my_needs_index) {
285 input.index = ibuffer;
290 }
else if (my_needs_value) {
294 my_sortspace.clear();
295 for (Index_ i = 0; i < input.number; ++i) {
296 auto val = input.value[i];
297 auto lookup = input.index[i] - my_reindex.offset;
298 auto start = my_reindex.pool_ptrs[lookup];
299 auto end = my_reindex.pool_ptrs[lookup + 1];
300 for (Index_ j = start; j < end; ++j) {
301 my_sortspace.emplace_back(my_reindex.pool_indices[j], val);
304 std::sort(my_sortspace.begin(), my_sortspace.end());
305 input.number = my_sortspace.size();
307 auto vcopy = vbuffer;
308 for (
const auto& ss : my_sortspace) {
312 input.value = vbuffer;
314 if (my_needs_index) {
315 auto icopy = ibuffer;
316 for (
const auto& ss : my_sortspace) {
320 input.index = ibuffer;
331 auto icopy = ibuffer;
333 for (Index_ i = 0; i < input.number; ++i) {
334 auto lookup = input.index[i] - my_reindex.offset;
335 auto start = my_reindex.pool_ptrs[lookup];
336 auto num = my_reindex.pool_ptrs[lookup + 1] - start;
339 if (my_needs_index) {
340 std::copy_n(my_reindex.pool_indices.begin() + start, num, icopy);
345 input.number = count;
346 if (my_needs_index) {
347 std::sort(ibuffer, ibuffer + count);
348 input.index = ibuffer;
358 std::unique_ptr<SparseExtractor<oracle_, Value_, Index_> > my_ext;
359 bool my_needs_value, my_needs_index, my_needs_sort;
360 SparseParallelReindex<Index_> my_reindex;
361 std::vector<std::pair<Index_, Value_> > my_sortspace;
362 std::vector<Index_> my_holding_ibuffer;
382template<
typename Value_,
typename Index_,
class SubsetStorage_>
393 my_matrix(std::move(matrix)), my_subset(std::move(subset)), my_by_row(by_row)
395 sanisizer::can_cast<Index_>(my_subset.size());
399 std::shared_ptr<const Matrix<Value_, Index_> > my_matrix;
400 SubsetStorage_ my_subset;
406 return my_subset.size();
408 return my_matrix->nrow();
414 return my_matrix->ncol();
416 return my_subset.size();
421 return my_matrix->is_sparse();
425 return my_matrix->is_sparse_proportion();
429 return my_matrix->prefer_rows();
433 return my_matrix->prefer_rows_proportion();
437 return my_matrix->uses_oracle(row);
448 template<
typename ... Args_>
449 std::unique_ptr<MyopicDenseExtractor<Value_, Index_> > populate_myopic_dense(
bool row, Args_&& ... args)
const {
450 if (row == my_by_row) {
451 return std::make_unique<subset_utils::MyopicPerpendicularDense<Value_, Index_, SubsetStorage_> >(my_matrix.get(), my_subset, row, std::forward<Args_>(args)...);
453 return std::make_unique<DelayedSubset_internal::ParallelDense<false, Value_, Index_> >(my_matrix.get(), my_subset, row,
false, std::forward<Args_>(args)...);
458 std::unique_ptr<MyopicDenseExtractor<Value_, Index_> >
dense(
bool row,
const Options& opt)
const {
459 return populate_myopic_dense(row, opt);
462 std::unique_ptr<MyopicDenseExtractor<Value_, Index_> >
dense(
bool row, Index_ block_start, Index_ block_length,
const Options& opt)
const {
463 return populate_myopic_dense(row, block_start, block_length, opt);
467 return populate_myopic_dense(row, std::move(my_subset_ptr), opt);
474 template<
typename ... Args_>
475 std::unique_ptr<MyopicSparseExtractor<Value_, Index_> > populate_myopic_sparse(
bool row, Args_&& ... args)
const {
476 if (row == my_by_row) {
477 return std::make_unique<subset_utils::MyopicPerpendicularSparse<Value_, Index_, SubsetStorage_> >(my_matrix.get(), my_subset, row, std::forward<Args_>(args)...);
479 return std::make_unique<DelayedSubset_internal::ParallelSparse<false, Value_, Index_> >(my_matrix.get(), my_subset, row,
false, std::forward<Args_>(args)...);
484 std::unique_ptr<MyopicSparseExtractor<Value_, Index_> >
sparse(
bool row,
const Options& opt)
const {
485 return populate_myopic_sparse(row, opt);
488 std::unique_ptr<MyopicSparseExtractor<Value_, Index_> >
sparse(
bool row, Index_ block_start, Index_ block_length,
const Options& opt)
const {
489 return populate_myopic_sparse(row, block_start, block_length, opt);
493 return populate_myopic_sparse(row, std::move(my_subset_ptr), opt);
500 template<
typename ... Args_>
501 std::unique_ptr<OracularDenseExtractor<Value_, Index_> > populate_oracular_dense(
bool row, std::shared_ptr<
const Oracle<Index_> > oracle, Args_&& ... args)
const {
502 if (row == my_by_row) {
503 return std::make_unique<subset_utils::OracularPerpendicularDense<Value_, Index_> >(my_matrix.get(), my_subset, row, std::move(oracle), std::forward<Args_>(args)...);
505 return std::make_unique<DelayedSubset_internal::ParallelDense<true, Value_, Index_> >(my_matrix.get(), my_subset, row, std::move(oracle), std::forward<Args_>(args)...);
510 std::unique_ptr<OracularDenseExtractor<Value_, Index_> >
dense(
bool row, std::shared_ptr<
const Oracle<Index_> > oracle,
const Options& opt)
const {
511 return populate_oracular_dense(row, std::move(oracle), opt);
514 std::unique_ptr<OracularDenseExtractor<Value_, Index_> >
dense(
bool row, std::shared_ptr<
const Oracle<Index_> > oracle, Index_ block_start, Index_ block_length,
const Options& opt)
const {
515 return populate_oracular_dense(row, std::move(oracle), block_start, block_length, opt);
519 return populate_oracular_dense(row, std::move(oracle), std::move(my_subset_ptr), opt);
526 template<
typename ... Args_>
527 std::unique_ptr<OracularSparseExtractor<Value_, Index_> > populate_oracular_sparse(
bool row, std::shared_ptr<
const Oracle<Index_> > oracle, Args_&& ... args)
const {
528 if (row == my_by_row) {
529 return std::make_unique<subset_utils::OracularPerpendicularSparse<Value_, Index_> >(my_matrix.get(), my_subset, row, std::move(oracle), std::forward<Args_>(args)...);
531 return std::make_unique<DelayedSubset_internal::ParallelSparse<true, Value_, Index_> >(my_matrix.get(), my_subset, row, std::move(oracle), std::forward<Args_>(args)...);
536 std::unique_ptr<OracularSparseExtractor<Value_, Index_> >
sparse(
bool row, std::shared_ptr<
const Oracle<Index_> > oracle,
const Options& opt)
const {
537 return populate_oracular_sparse(row, std::move(oracle), opt);
540 std::unique_ptr<OracularSparseExtractor<Value_, Index_> >
sparse(
bool row, std::shared_ptr<
const Oracle<Index_> > oracle, Index_ block_start, Index_ block_length,
const Options& opt)
const {
541 return populate_oracular_sparse(row, std::move(oracle), block_start, block_length, opt);
545 return populate_oracular_sparse(row, std::move(oracle), std::move(my_subset_ptr), opt);
Delayed subsetting of a matrix with general indices.
Definition DelayedSubset.hpp:383
bool uses_oracle(bool row) const
Definition DelayedSubset.hpp:436
std::unique_ptr< OracularSparseExtractor< Value_, Index_ > > sparse(bool row, std::shared_ptr< const Oracle< Index_ > > oracle, VectorPtr< Index_ > my_subset_ptr, const Options &opt) const
Definition DelayedSubset.hpp:544
std::unique_ptr< MyopicDenseExtractor< Value_, Index_ > > dense(bool row, Index_ block_start, Index_ block_length, const Options &opt) const
Definition DelayedSubset.hpp:462
std::unique_ptr< MyopicSparseExtractor< Value_, Index_ > > sparse(bool row, Index_ block_start, Index_ block_length, const Options &opt) const
Definition DelayedSubset.hpp:488
std::unique_ptr< OracularSparseExtractor< Value_, Index_ > > sparse(bool row, std::shared_ptr< const Oracle< Index_ > > oracle, const Options &opt) const
Definition DelayedSubset.hpp:536
Index_ ncol() const
Definition DelayedSubset.hpp:412
bool prefer_rows() const
Definition DelayedSubset.hpp:428
std::unique_ptr< OracularDenseExtractor< Value_, Index_ > > dense(bool row, std::shared_ptr< const Oracle< Index_ > > oracle, Index_ block_start, Index_ block_length, const Options &opt) const
Definition DelayedSubset.hpp:514
std::unique_ptr< MyopicSparseExtractor< Value_, Index_ > > sparse(bool row, VectorPtr< Index_ > my_subset_ptr, const Options &opt) const
Definition DelayedSubset.hpp:492
std::unique_ptr< MyopicSparseExtractor< Value_, Index_ > > sparse(bool row, const Options &opt) const
Definition DelayedSubset.hpp:484
std::unique_ptr< OracularDenseExtractor< Value_, Index_ > > dense(bool row, std::shared_ptr< const Oracle< Index_ > > oracle, const Options &opt) const
Definition DelayedSubset.hpp:510
DelayedSubset(std::shared_ptr< const Matrix< Value_, Index_ > > matrix, SubsetStorage_ subset, bool by_row)
Definition DelayedSubset.hpp:392
double prefer_rows_proportion() const
Definition DelayedSubset.hpp:432
std::unique_ptr< OracularDenseExtractor< Value_, Index_ > > dense(bool row, std::shared_ptr< const Oracle< Index_ > > oracle, VectorPtr< Index_ > my_subset_ptr, const Options &opt) const
Definition DelayedSubset.hpp:518
std::unique_ptr< MyopicDenseExtractor< Value_, Index_ > > dense(bool row, const Options &opt) const
Definition DelayedSubset.hpp:458
Index_ nrow() const
Definition DelayedSubset.hpp:404
std::unique_ptr< OracularSparseExtractor< Value_, Index_ > > sparse(bool row, std::shared_ptr< const Oracle< Index_ > > oracle, Index_ block_start, Index_ block_length, const Options &opt) const
Definition DelayedSubset.hpp:540
bool is_sparse() const
Definition DelayedSubset.hpp:420
double is_sparse_proportion() const
Definition DelayedSubset.hpp:424
std::unique_ptr< MyopicDenseExtractor< Value_, Index_ > > dense(bool row, VectorPtr< Index_ > my_subset_ptr, const Options &opt) const
Definition DelayedSubset.hpp:466
Virtual class for a matrix.
Definition Matrix.hpp:59
Predict future access requests on the target dimension.
Definition Oracle.hpp:29
Flexible representations for matrix data.
Definition Extractor.hpp:15
std::shared_ptr< const std::vector< Index_ > > VectorPtr
Definition Matrix.hpp:26
void resize_container_to_Index_size(Container_ &container, Index_ x, Args_ &&... args)
Definition Index_to_container.hpp:88
typename std::conditional< oracle_, OracularDenseExtractor< Value_, Index_ >, MyopicDenseExtractor< Value_, Index_ > >::type DenseExtractor
Definition Extractor.hpp:273
Options for accessing data from a Matrix instance.
Definition Options.hpp:30