1#ifndef TATAMI_DELAYED_SUBSET_HPP
2#define TATAMI_DELAYED_SUBSET_HPP
5#include "../utils/Index_to_container.hpp"
10#include "sanisizer/sanisizer.hpp"
25namespace DelayedSubset_internal {
27template<
typename Index_>
28struct DenseParallelResults {
29 std::vector<Index_> collapsed;
30 std::vector<Index_> reindex;
33template<
typename Index_,
class SubsetStorage_,
class ToIndex_>
34DenseParallelResults<Index_> format_dense_parallel_base(
const SubsetStorage_& subset,
const Index_ len,
const ToIndex_ to_index) {
35 std::vector<std::pair<Index_, Index_> > collected;
36 collected.reserve(len);
37 for (Index_ i = 0; i < len; ++i) {
38 collected.emplace_back(subset[to_index(i)], i);
40 std::sort(collected.begin(), collected.end());
42 DenseParallelResults<Index_> output;
43 if (collected.size()) {
44 output.collapsed.reserve(len);
47 Index_ last = collected.front().first;
48 output.collapsed.push_back(last);
49 output.reindex[collected.front().second] = 0;
52 for (Index_ i = 1; i < len; ++i) {
53 const auto& pp = collected[i];
54 if (pp.first != last) {
56 output.collapsed.push_back(last);
59 output.reindex[pp.second] = counter;
66template<
bool oracle_,
typename Value_,
typename Index_>
67class ParallelDense final :
public DenseExtractor<oracle_, Value_, Index_> {
69 template<
class SubsetStorage_>
71 const Matrix<Value_, Index_>& matrix,
72 const SubsetStorage_& subset,
74 MaybeOracle<oracle_, Index_> oracle,
77 auto processed = format_dense_parallel_base<Index_>(subset, subset.size(), [&](
const Index_ i) -> Index_ { return i; });
78 initialize(matrix, std::move(processed), row, std::move(oracle), opt);
81 template<
class SubsetStorage_>
83 const Matrix<Value_, Index_>& matrix,
84 const SubsetStorage_& subset,
86 MaybeOracle<oracle_, Index_> oracle,
87 const Index_ block_start,
88 const Index_ block_length,
91 auto processed = format_dense_parallel_base<Index_>(subset, block_length, [&](
const Index_ i) -> Index_ {
return i + block_start; });
92 initialize(matrix, std::move(processed), row, std::move(oracle), opt);
95 template<
class SubsetStorage_>
97 const Matrix<Value_, Index_>& matrix,
98 const SubsetStorage_& subset,
100 MaybeOracle<oracle_, Index_> oracle,
101 VectorPtr<Index_> indices_ptr,
104 const auto& indices = *indices_ptr;
105 auto processed = format_dense_parallel_base<Index_>(subset, indices.size(), [&](
const Index_ i) -> Index_ { return indices[i]; });
106 initialize(matrix, std::move(processed), row, std::move(oracle), opt);
111 const Matrix<Value_, Index_>& matrix,
112 DenseParallelResults<Index_> processed,
114 MaybeOracle<oracle_, Index_> oracle,
118 my_ext = new_extractor<false, oracle_>(matrix, row, std::move(oracle), std::move(processed.collapsed), opt);
119 my_reindex.swap(processed.reindex);
123 const Value_* fetch(
const Index_ i, Value_*
const buffer) {
124 const auto src = my_ext->fetch(i, my_holding_vbuffer.data());
128 for (
auto p : my_reindex) {
137 std::unique_ptr<DenseExtractor<oracle_, Value_, Index_> > my_ext;
138 std::vector<Value_> my_holding_vbuffer;
139 std::vector<Index_> my_reindex;
142template<
typename Index_>
143struct SparseParallelReindex {
150 std::vector<Index_> pool_ptrs;
151 std::vector<Index_> pool_indices;
155template<
typename Index_>
156struct SparseParallelResults {
157 std::vector<Index_> collapsed;
158 SparseParallelReindex<Index_> reindex;
161template<
typename Index_,
class SubsetStorage_,
class ToIndex_>
162SparseParallelResults<Index_> format_sparse_parallel_base(
const SubsetStorage_& indices,
const Index_ len,
const ToIndex_ to_index) {
163 std::vector<std::pair<Index_, Index_> > collected;
164 collected.reserve(len);
165 for (Index_ i = 0; i < len; ++i) {
166 const auto curdex = to_index(i);
167 collected.emplace_back(indices[curdex], curdex);
169 std::sort(collected.begin(), collected.end());
171 SparseParallelResults<Index_> output;
173 if (collected.size()) {
174 output.collapsed.reserve(len);
175 output.reindex.pool_indices.reserve(len);
176 const Index_ first = collected.front().first;
182 output.reindex.offset = first;
183 const Index_ allocation = collected.back().first - output.reindex.offset + 1;
184 output.reindex.pool_ptrs.resize(sanisizer::sum<I<
decltype(output.reindex.pool_ptrs.size())> >(allocation, 1));
187 output.reindex.pool_ptrs[counter] = 0;
189 output.reindex.pool_indices.push_back(collected.front().second);
190 output.reindex.pool_ptrs[counter] = 1;
191 output.collapsed.push_back(first);
194 for (Index_ i = 1; i < len; ++i) {
195 const auto& pp = collected[i];
196 const auto current = pp.first;
197 if (current == last) {
198 output.reindex.pool_indices.push_back(pp.second);
199 ++(output.reindex.pool_ptrs[counter]);
203 const Index_ pool_size = output.reindex.pool_indices.size();
204 counter = current - output.reindex.offset;
205 output.reindex.pool_ptrs[counter] = pool_size;
207 output.reindex.pool_indices.push_back(pp.second);
208 output.reindex.pool_ptrs[counter] = pool_size + 1;
209 output.collapsed.push_back(current);
217template<
bool oracle_,
typename Value_,
typename Index_>
218class ParallelSparse final :
public SparseExtractor<oracle_, Value_, Index_> {
220 template<
class SubsetStorage_>
222 const Matrix<Value_, Index_>& mat,
223 const SubsetStorage_& subset,
225 MaybeOracle<oracle_, Index_> oracle,
228 auto processed = format_sparse_parallel_base<Index_>(subset, subset.size(), [](
const Index_ i) -> Index_ { return i; });
229 initialize(mat, std::move(processed), subset.size(), row, std::move(oracle), opt);
232 template<
class SubsetStorage_>
234 const Matrix<Value_, Index_>& mat,
235 const SubsetStorage_& subset,
237 MaybeOracle<oracle_, Index_> oracle,
238 const Index_ block_start,
239 const Index_ block_length,
242 auto processed = format_sparse_parallel_base<Index_>(subset, block_length, [&](
const Index_ i) -> Index_ {
return i + block_start; });
243 initialize(mat, std::move(processed), block_length, row, std::move(oracle), opt);
246 template<
class SubsetStorage_>
248 const Matrix<Value_, Index_>& mat,
249 const SubsetStorage_& subset,
251 MaybeOracle<oracle_, Index_> oracle,
252 VectorPtr<Index_> indices_ptr,
255 const auto& indices = *indices_ptr;
256 auto processed = format_sparse_parallel_base<Index_>(subset, indices.size(), [&](
const Index_ i) -> Index_ { return indices[i]; });
257 initialize(mat, std::move(processed), indices.size(), row, std::move(oracle), opt);
262 const Matrix<Value_, Index_>& mat,
263 SparseParallelResults<Index_> processed,
266 MaybeOracle<oracle_, Index_> oracle,
269 const Index_ num_collapsed = processed.collapsed.size();
270 my_shift = extent - num_collapsed;
272 my_needs_value = opt.sparse_extract_value;
273 my_needs_index = opt.sparse_extract_index;
274 my_needs_sort = opt.sparse_ordered_index;
276 if (my_needs_sort && my_needs_value) {
277 my_sortspace.reserve(extent);
281 opt.sparse_extract_index =
true;
282 if (!my_needs_index) {
286 my_ext = new_extractor<true, oracle_>(mat, row, std::move(oracle), std::move(processed.collapsed), opt);
287 my_reindex = std::move(processed.reindex);
291 SparseRange<Value_, Index_> fetch(
const Index_ i, Value_*
const vbuffer, Index_*
const ibuffer) {
292 const auto vinit = (my_needs_value ? vbuffer + my_shift : NULL);
293 const auto iinit = (my_needs_index ? ibuffer + my_shift : my_holding_ibuffer.data());
294 auto input = my_ext->fetch(i, vinit, iinit);
296 if (!my_needs_sort) {
305 auto vcopy = vbuffer;
306 auto icopy = ibuffer;
308 auto vsrc = input.value;
309 bool replace_value = my_needs_value && vsrc != vcopy;
311 for (Index_ i = 0; i < input.number; ++i) {
312 const auto lookup = input.index[i] - my_reindex.offset;
313 const auto start = my_reindex.pool_ptrs[lookup];
314 const auto num = my_reindex.pool_ptrs[lookup + 1] - start;
319 std::fill_n(vcopy, num, val);
322 replace_value = (vcopy != vsrc);
325 if (my_needs_index) {
330 std::copy_n(my_reindex.pool_indices.begin() + start, num, icopy);
335 input.number = count;
336 if (my_needs_value) {
337 input.value = vbuffer;
339 if (my_needs_index) {
340 input.index = ibuffer;
345 }
else if (my_needs_value) {
349 my_sortspace.clear();
350 for (Index_ i = 0; i < input.number; ++i) {
351 const auto val = input.value[i];
352 const auto lookup = input.index[i] - my_reindex.offset;
353 const auto start = my_reindex.pool_ptrs[lookup];
354 const auto end = my_reindex.pool_ptrs[lookup + 1];
355 for (Index_ j = start; j < end; ++j) {
356 my_sortspace.emplace_back(my_reindex.pool_indices[j], val);
359 std::sort(my_sortspace.begin(), my_sortspace.end());
360 input.number = my_sortspace.size();
362 auto vcopy = vbuffer;
363 for (
const auto& ss : my_sortspace) {
367 input.value = vbuffer;
369 if (my_needs_index) {
370 auto icopy = ibuffer;
371 for (
const auto& ss : my_sortspace) {
375 input.index = ibuffer;
386 auto icopy = ibuffer;
388 for (Index_ i = 0; i < input.number; ++i) {
389 const auto lookup = input.index[i] - my_reindex.offset;
390 const auto start = my_reindex.pool_ptrs[lookup];
391 const auto num = my_reindex.pool_ptrs[lookup + 1] - start;
394 if (my_needs_index) {
395 std::copy_n(my_reindex.pool_indices.begin() + start, num, icopy);
400 input.number = count;
401 if (my_needs_index) {
402 std::sort(ibuffer, ibuffer + count);
403 input.index = ibuffer;
413 std::unique_ptr<SparseExtractor<oracle_, Value_, Index_> > my_ext;
414 bool my_needs_value, my_needs_index, my_needs_sort;
415 SparseParallelReindex<Index_> my_reindex;
416 std::vector<std::pair<Index_, Value_> > my_sortspace;
417 std::vector<Index_> my_holding_ibuffer;
437template<
typename Value_,
typename Index_,
class SubsetStorage_>
449 SubsetStorage_ subset,
452 my_matrix(std::move(matrix)),
453 my_subset(std::move(subset)),
456 sanisizer::can_cast<Index_>(my_subset.size());
460 std::shared_ptr<const Matrix<Value_, Index_> > my_matrix;
461 SubsetStorage_ my_subset;
467 return my_subset.size();
469 return my_matrix->nrow();
475 return my_matrix->ncol();
477 return my_subset.size();
482 return my_matrix->is_sparse();
486 return my_matrix->is_sparse_proportion();
490 return my_matrix->prefer_rows();
494 return my_matrix->prefer_rows_proportion();
498 return my_matrix->uses_oracle(row);
509 template<
typename ... Args_>
510 std::unique_ptr<MyopicDenseExtractor<Value_, Index_> > populate_myopic_dense(
514 if (row == my_by_row) {
515 return std::make_unique<subset_utils::MyopicPerpendicularDense<Value_, Index_, SubsetStorage_> >(
519 std::forward<Args_>(args)...
522 return std::make_unique<DelayedSubset_internal::ParallelDense<false, Value_, Index_> >(
527 std::forward<Args_>(args)...
533 std::unique_ptr<MyopicDenseExtractor<Value_, Index_> >
dense(
537 return populate_myopic_dense(row, opt);
540 std::unique_ptr<MyopicDenseExtractor<Value_, Index_> >
dense(
542 const Index_ block_start,
543 const Index_ block_length,
546 return populate_myopic_dense(row, block_start, block_length, opt);
549 std::unique_ptr<MyopicDenseExtractor<Value_, Index_> >
dense(
554 return populate_myopic_dense(row, std::move(my_subset_ptr), opt);
561 template<
typename ... Args_>
562 std::unique_ptr<MyopicSparseExtractor<Value_, Index_> > populate_myopic_sparse(
566 if (row == my_by_row) {
567 return std::make_unique<subset_utils::MyopicPerpendicularSparse<Value_, Index_, SubsetStorage_> >(
571 std::forward<Args_>(args)...
574 return std::make_unique<DelayedSubset_internal::ParallelSparse<false, Value_, Index_> >(
579 std::forward<Args_>(args)...
585 std::unique_ptr<MyopicSparseExtractor<Value_, Index_> >
sparse(
589 return populate_myopic_sparse(row, opt);
592 std::unique_ptr<MyopicSparseExtractor<Value_, Index_> >
sparse(
594 const Index_ block_start,
595 const Index_ block_length,
598 return populate_myopic_sparse(row, block_start, block_length, opt);
601 std::unique_ptr<MyopicSparseExtractor<Value_, Index_> >
sparse(
606 return populate_myopic_sparse(row, std::move(my_subset_ptr), opt);
613 template<
typename ... Args_>
614 std::unique_ptr<OracularDenseExtractor<Value_, Index_> > populate_oracular_dense(
619 if (row == my_by_row) {
620 return std::make_unique<subset_utils::OracularPerpendicularDense<Value_, Index_> >(
625 std::forward<Args_>(args)...
628 return std::make_unique<DelayedSubset_internal::ParallelDense<true, Value_, Index_> >(
633 std::forward<Args_>(args)...
639 std::unique_ptr<OracularDenseExtractor<Value_, Index_> >
dense(
644 return populate_oracular_dense(row, std::move(oracle), opt);
647 std::unique_ptr<OracularDenseExtractor<Value_, Index_> >
dense(
650 const Index_ block_start,
651 const Index_ block_length,
654 return populate_oracular_dense(row, std::move(oracle), block_start, block_length, opt);
657 std::unique_ptr<OracularDenseExtractor<Value_, Index_> >
dense(
663 return populate_oracular_dense(row, std::move(oracle), std::move(my_subset_ptr), opt);
670 template<
typename ... Args_>
671 std::unique_ptr<OracularSparseExtractor<Value_, Index_> > populate_oracular_sparse(
676 if (row == my_by_row) {
677 return std::make_unique<subset_utils::OracularPerpendicularSparse<Value_, Index_> >(
682 std::forward<Args_>(args)...
685 return std::make_unique<DelayedSubset_internal::ParallelSparse<true, Value_, Index_> >(
690 std::forward<Args_>(args)...
696 std::unique_ptr<OracularSparseExtractor<Value_, Index_> >
sparse(
701 return populate_oracular_sparse(row, std::move(oracle), opt);
704 std::unique_ptr<OracularSparseExtractor<Value_, Index_> >
sparse(
707 const Index_ block_start,
708 const Index_ block_length,
711 return populate_oracular_sparse(row, std::move(oracle), block_start, block_length, opt);
714 std::unique_ptr<OracularSparseExtractor<Value_, Index_> >
sparse(
720 return populate_oracular_sparse(row, std::move(oracle), std::move(my_subset_ptr), opt);
Delayed subsetting of a matrix with general indices.
Definition DelayedSubset.hpp:438
bool uses_oracle(const bool row) const
Definition DelayedSubset.hpp:497
std::unique_ptr< MyopicSparseExtractor< Value_, Index_ > > sparse(const bool row, VectorPtr< Index_ > my_subset_ptr, const Options &opt) const
Definition DelayedSubset.hpp:601
std::unique_ptr< OracularDenseExtractor< Value_, Index_ > > dense(const bool row, std::shared_ptr< const Oracle< Index_ > > oracle, const Options &opt) const
Definition DelayedSubset.hpp:639
Index_ ncol() const
Definition DelayedSubset.hpp:473
bool prefer_rows() const
Definition DelayedSubset.hpp:489
DelayedSubset(std::shared_ptr< const Matrix< Value_, Index_ > > matrix, SubsetStorage_ subset, const bool by_row)
Definition DelayedSubset.hpp:447
std::unique_ptr< MyopicDenseExtractor< Value_, Index_ > > dense(const bool row, const Options &opt) const
Definition DelayedSubset.hpp:533
std::unique_ptr< MyopicSparseExtractor< Value_, Index_ > > sparse(const bool row, const Index_ block_start, const Index_ block_length, const Options &opt) const
Definition DelayedSubset.hpp:592
std::unique_ptr< OracularSparseExtractor< Value_, Index_ > > sparse(const bool row, std::shared_ptr< const Oracle< Index_ > > oracle, const Options &opt) const
Definition DelayedSubset.hpp:696
std::unique_ptr< OracularDenseExtractor< Value_, Index_ > > dense(const bool row, std::shared_ptr< const Oracle< Index_ > > oracle, VectorPtr< Index_ > my_subset_ptr, const Options &opt) const
Definition DelayedSubset.hpp:657
std::unique_ptr< OracularSparseExtractor< Value_, Index_ > > sparse(const bool row, std::shared_ptr< const Oracle< Index_ > > oracle, const Index_ block_start, const Index_ block_length, const Options &opt) const
Definition DelayedSubset.hpp:704
double prefer_rows_proportion() const
Definition DelayedSubset.hpp:493
std::unique_ptr< OracularDenseExtractor< Value_, Index_ > > dense(const bool row, std::shared_ptr< const Oracle< Index_ > > oracle, const Index_ block_start, const Index_ block_length, const Options &opt) const
Definition DelayedSubset.hpp:647
std::unique_ptr< MyopicDenseExtractor< Value_, Index_ > > dense(const bool row, VectorPtr< Index_ > my_subset_ptr, const Options &opt) const
Definition DelayedSubset.hpp:549
std::unique_ptr< MyopicSparseExtractor< Value_, Index_ > > sparse(const bool row, const Options &opt) const
Definition DelayedSubset.hpp:585
std::unique_ptr< OracularSparseExtractor< Value_, Index_ > > sparse(const bool row, std::shared_ptr< const Oracle< Index_ > > oracle, VectorPtr< Index_ > my_subset_ptr, const Options &opt) const
Definition DelayedSubset.hpp:714
Index_ nrow() const
Definition DelayedSubset.hpp:465
bool is_sparse() const
Definition DelayedSubset.hpp:481
std::unique_ptr< MyopicDenseExtractor< Value_, Index_ > > dense(const bool row, const Index_ block_start, const Index_ block_length, const Options &opt) const
Definition DelayedSubset.hpp:540
double is_sparse_proportion() const
Definition DelayedSubset.hpp:485
Virtual class for a matrix.
Definition Matrix.hpp:59
Predict future access requests on the target dimension.
Definition Oracle.hpp:29
Flexible representations for matrix data.
Definition Extractor.hpp:15
std::shared_ptr< const std::vector< Index_ > > VectorPtr
Definition Matrix.hpp:26
void resize_container_to_Index_size(Container_ &container, const Index_ x, Args_ &&... args)
Definition Index_to_container.hpp:92
typename std::conditional< oracle_, OracularDenseExtractor< Value_, Index_ >, MyopicDenseExtractor< Value_, Index_ > >::type DenseExtractor
Definition Extractor.hpp:273
Options for accessing data from a Matrix instance.
Definition Options.hpp:30