tatami
C++ API for different matrix representations
Loading...
Searching...
No Matches
DelayedSubsetUnique.hpp
Go to the documentation of this file.
1#ifndef TATAMI_DELAYED_SUBSET_UNIQUE_HPP
2#define TATAMI_DELAYED_SUBSET_UNIQUE_HPP
3
4#include "utils.hpp"
5#include "../base/Matrix.hpp"
6#include "../utils/copy.hpp"
7
8#include <algorithm>
9#include <numeric>
10#include <memory>
11
18namespace tatami {
19
24
25template<typename Index_>
27 std::vector<Index_> sorted;
28 std::vector<Index_> permutation;
29};
30
31template<typename Index_, class SubsetStorage_, class ToIndex_>
33 std::vector<std::pair<Index_, Index_> > collected;
34 collected.reserve(len);
35 for (Index_ i = 0; i < len; ++i) {
36 collected.emplace_back(subset[to_index(i)], i);
37 }
38 std::sort(collected.begin(), collected.end());
39
41 output.sorted.reserve(len);
42 output.permutation.reserve(len);
43 for (const auto& pp : collected) {
44 output.sorted.push_back(pp.first);
45 output.permutation.push_back(pp.second);
46 }
47
48 return output;
49}
50
51template<bool oracle_, typename Value_, typename Index_>
52class ParallelDense : public DenseExtractor<oracle_, Value_, Index_> {
53public:
54 template<class SubsetStorage_>
55 ParallelDense(const Matrix<Value_, Index_>* matrix, const SubsetStorage_& subset, bool row, MaybeOracle<oracle_, Index_> oracle, const Options& opt) {
56 auto processed = format_dense_parallel<Index_>(subset, subset.size(), [&](Index_ i) -> Index_ { return i; });
57 initialize(matrix, std::move(processed), row, std::move(oracle), opt);
58 }
59
60 template<class SubsetStorage_>
61 ParallelDense(const Matrix<Value_, Index_>* matrix, const SubsetStorage_& subset, bool row, MaybeOracle<oracle_, Index_> oracle, Index_ block_start, Index_ block_length, const Options& opt) {
62 auto processed = format_dense_parallel<Index_>(subset, block_length, [&](Index_ i) -> Index_ { return i + block_start; });
63 initialize(matrix, std::move(processed), row, std::move(oracle), opt);
64 }
65
66 template<class SubsetStorage_>
67 ParallelDense(const Matrix<Value_, Index_>* matrix, const SubsetStorage_& subset, bool row, MaybeOracle<oracle_, Index_> oracle, VectorPtr<Index_> indices_ptr, const Options& opt) {
68 const auto& indices = *indices_ptr;
69 auto processed = format_dense_parallel<Index_>(subset, indices.size(), [&](Index_ i) -> Index_ { return indices[i]; });
70 initialize(matrix, std::move(processed), row, std::move(oracle), opt);
71 }
72
73private:
74 void initialize(const Matrix<Value_, Index_>* matrix, DenseParallelResults<Index_> processed, bool row, MaybeOracle<oracle_, Index_> oracle, const Options& opt) {
75 size_t extent = processed.sorted.size();
76 my_holding_vbuffer.resize(extent);
77 my_ext = new_extractor<false, oracle_>(matrix, row, std::move(oracle), std::move(processed.sorted), opt);
78 my_permutation = std::move(processed.permutation);
79 }
80
81public:
82 const Value_* fetch(Index_ i, Value_* buffer) {
83 auto src = my_ext->fetch(i, my_holding_vbuffer.data());
84
85 // 'input' and 'output' should not point to the same array. In theory, it
86 // is possible to do an in-place permutation, but this requires another
87 // array anyway to track the permutation status, so we'll just keep it simple.
88 for (auto p : my_permutation) {
89 buffer[p] = *src;
90 ++src;
91 }
92
93 return buffer;
94 }
95
96private:
97 std::unique_ptr<DenseExtractor<oracle_, Value_, Index_> > my_ext;
98 std::vector<Value_> my_holding_vbuffer;
99 std::vector<Index_> my_permutation;
100};
101
102template<typename Index_, class SubsetStorage_, class ToIndex_>
104 std::vector<Index_> collected;
105 collected.reserve(len);
106 for (Index_ i = 0; i < len; ++i) {
107 collected.emplace_back(subset[to_index(i)]);
108 }
109 std::sort(collected.begin(), collected.end());
110 return collected;
111}
112
113template<bool oracle_, typename Value_, typename Index_>
114class ParallelSparse : public SparseExtractor<oracle_, Value_, Index_> {
115public:
116 template<class SubsetStorage_>
117 ParallelSparse(
118 const Matrix<Value_, Index_>* matrix,
119 const SubsetStorage_& subset,
120 const std::vector<Index_>& remap,
121 bool row, MaybeOracle<oracle_, Index_> oracle,
122 const Options& opt
123 ) :
124 my_remapping(remap)
125 {
126 auto processed = format_sparse_parallel<Index_>(subset, subset.size(), [](Index_ i) -> Index_ { return i; });
127 initialize(matrix, std::move(processed), row, std::move(oracle), opt);
128 }
129
130 template<class SubsetStorage_>
131 ParallelSparse(
132 const Matrix<Value_, Index_>* matrix,
133 const SubsetStorage_& subset,
134 const std::vector<Index_>& remap,
135 bool row,
136 MaybeOracle<oracle_, Index_> oracle,
137 Index_ block_start,
138 Index_ block_length,
139 const Options& opt
140 ) :
141 my_remapping(remap)
142 {
143 auto processed = format_sparse_parallel<Index_>(subset, block_length, [&](Index_ i) -> Index_ { return i + block_start; });
144 initialize(matrix, std::move(processed), row, std::move(oracle), opt);
145 }
146
147 template<class SubsetStorage_>
148 ParallelSparse(
149 const Matrix<Value_, Index_>* matrix,
150 const SubsetStorage_& subset,
151 const std::vector<Index_>& remap,
152 bool row,
153 MaybeOracle<oracle_, Index_> oracle,
154 VectorPtr<Index_> indices_ptr,
155 const Options& opt
156 ) :
157 my_remapping(remap)
158 {
159 const auto& indices = *indices_ptr;
160 auto processed = format_sparse_parallel<Index_>(subset, indices.size(), [&](Index_ i) -> Index_ { return indices[i]; });
161 initialize(matrix, std::move(processed), row, std::move(oracle), opt);
162 }
163
164private:
165 void initialize(const Matrix<Value_, Index_>* matrix, std::vector<Index_> sorted, bool row, MaybeOracle<oracle_, Index_> oracle, Options opt) {
166 my_needs_value = opt.sparse_extract_value;
167 my_needs_index = opt.sparse_extract_index;
168 my_needs_sort = opt.sparse_ordered_index;
169
170 // The conditionals here mirror those in 'fetch',
171 // to self-document the case where each of the temporaries are needed.
172 if (!my_needs_sort) {
173 if (my_needs_index) {
174 ; // no 'my_holding_ibuffer' required as a user-provided 'index_buffer' should be available.
175 }
176
177 } else if (my_needs_value) {
178 opt.sparse_extract_index = true;
179 my_sortspace.reserve(sorted.size());
180 if (my_needs_index) {
181 ; // no 'my_holding_ibuffer' required as a user-provided 'index_buffer' should be available.
182 } else {
183 my_holding_ibuffer.resize(sorted.size()); // needs 'my_holding_ibuffer' as user-provided 'index_buffer' may be NULL.
184 }
185
186 } else if (my_needs_index) {
187 ; // no 'my_holding_ibuffer' required as a user-provided 'index_buffer' should be available.
188 }
189
190 my_ext = new_extractor<true, oracle_>(matrix, row, std::move(oracle), std::move(sorted), opt);
191 }
192
193public:
194 SparseRange<Value_, Index_> fetch(Index_ i, Value_* value_buffer, Index_* index_buffer) {
195 auto input = my_ext->fetch(i, value_buffer, (my_holding_ibuffer.empty() ? index_buffer : my_holding_ibuffer.data()));
196
197 // Pointers in 'input' and the 'buffer' pointers may point to the same array,
198 // as we're either just modifiying in place or we're copying to 'my_sortspace'.
199 if (!my_needs_sort) {
200 if (my_needs_index) {
201 for (Index_ i = 0; i < input.number; ++i) {
202 index_buffer[i] = my_remapping[input.index[i]];
203 }
204 input.index = index_buffer;
205 }
206
207 } else if (my_needs_value) {
208 // We assume that the subset have already been extracted for sorting
209 // purposes, even if they weren't actually requested.
210 my_sortspace.clear();
211 for (Index_ i = 0; i < input.number; ++i) {
212 my_sortspace.emplace_back(my_remapping[input.index[i]], input.value[i]);
213 }
214 std::sort(my_sortspace.begin(), my_sortspace.end());
215
216 auto vcopy = value_buffer;
217 for (const auto& ss : my_sortspace) {
218 *vcopy = ss.second;
219 ++vcopy;
220 }
221 input.value = value_buffer;
222
223 if (my_needs_index) {
224 auto icopy = index_buffer;
225 for (const auto& ss : my_sortspace) {
226 *icopy = ss.first;
227 ++icopy;
228 }
229 input.index = index_buffer;
230 } else {
231 input.index = NULL;
232 }
233
234 } else if (my_needs_index) {
235 for (Index_ i = 0; i < input.number; ++i) {
236 index_buffer[i] = my_remapping[input.index[i]];
237 }
238 std::sort(index_buffer, index_buffer + input.number);
239 input.index = index_buffer;
240 }
241
242 return input;
243 }
244
245private:
246 const std::vector<Index_>& my_remapping;
247 std::unique_ptr<SparseExtractor<oracle_, Value_, Index_> > my_ext;
248 bool my_needs_value, my_needs_index, my_needs_sort;
249 std::vector<std::pair<Index_, Value_> > my_sortspace;
250 std::vector<Index_> my_holding_ibuffer;
251};
252
253}
269template<typename Value_, typename Index_, class SubsetStorage_>
270class DelayedSubsetUnique : public Matrix<Value_, Index_> {
271public:
280 DelayedSubsetUnique(std::shared_ptr<const Matrix<Value_, Index_> > matrix, SubsetStorage_ subset, bool by_row, bool check = true) :
281 my_matrix(std::move(matrix)), my_subset(std::move(subset)), my_by_row(by_row)
282 {
283 Index_ fulldim = my_by_row ? my_matrix->nrow() : my_matrix->ncol();
284
285 if (check) {
286 std::vector<unsigned char> checks(fulldim);
287 for (Index_ i = 0, end = my_subset.size(); i < end; ++i) {
288 auto& found = checks[my_subset[i]];
289 if (found) {
290 throw std::runtime_error("my_subset should be unique");
291 }
292 found = 1;
293 }
294 }
295
296 my_mapping_single.resize(fulldim);
297 for (Index_ i = 0, end = my_subset.size(); i < end; ++i) {
298 my_mapping_single[my_subset[i]] = i;
299 }
300 }
301
302private:
303 std::shared_ptr<const Matrix<Value_, Index_> > my_matrix;
304 SubsetStorage_ my_subset;
305 bool my_by_row;
306 std::vector<Index_> my_mapping_single;
307
308public:
309 Index_ nrow() const {
310 if (my_by_row) {
311 return my_subset.size();
312 } else {
313 return my_matrix->nrow();
314 }
315 }
316
317 Index_ ncol() const {
318 if (my_by_row) {
319 return my_matrix->ncol();
320 } else {
321 return my_subset.size();
322 }
323 }
324
325 bool is_sparse() const {
326 return my_matrix->is_sparse();
327 }
328
329 double is_sparse_proportion() const {
330 return my_matrix->is_sparse_proportion();
331 }
332
333 bool prefer_rows() const {
334 return my_matrix->prefer_rows();
335 }
336
337 double prefer_rows_proportion() const {
338 return my_matrix->prefer_rows_proportion();
339 }
340
341 bool uses_oracle(bool row) const {
342 return my_matrix->uses_oracle(row);
343 }
344
346
348
350
352
353 /********************
354 *** Myopic dense ***
355 ********************/
356private:
357 template<typename ... Args_>
358 std::unique_ptr<MyopicDenseExtractor<Value_, Index_> > populate_myopic_dense(bool row, Args_&& ... args) const {
359 if (row == my_by_row) {
360 return std::make_unique<subset_utils::MyopicPerpendicularDense<Value_, Index_, SubsetStorage_> >(my_matrix.get(), my_subset, row, std::forward<Args_>(args)...);
361 } else {
362 return std::make_unique<DelayedSubsetUnique_internal::ParallelDense<false, Value_, Index_> >(my_matrix.get(), my_subset, row, false, std::forward<Args_>(args)...);
363 }
364 }
365
366public:
367 std::unique_ptr<MyopicDenseExtractor<Value_, Index_> > dense(bool row, const Options& opt) const {
368 return populate_myopic_dense(row, opt);
369 }
370
371 std::unique_ptr<MyopicDenseExtractor<Value_, Index_> > dense(bool row, Index_ block_start, Index_ block_length, const Options& opt) const {
372 return populate_myopic_dense(row, block_start, block_length, opt);
373 }
374
375 std::unique_ptr<MyopicDenseExtractor<Value_, Index_> > dense(bool row, VectorPtr<Index_> indices_ptr, const Options& opt) const {
376 return populate_myopic_dense(row, std::move(indices_ptr), opt);
377 }
378
379 /*********************
380 *** Myopic sparse ***
381 *********************/
382private:
383 template<typename ... Args_>
384 std::unique_ptr<MyopicSparseExtractor<Value_, Index_> > populate_myopic_sparse(bool row, Args_&& ... args) const {
385 if (row == my_by_row) {
386 return std::make_unique<subset_utils::MyopicPerpendicularSparse<Value_, Index_, SubsetStorage_> >(my_matrix.get(), my_subset, row, std::forward<Args_>(args)...);
387 } else {
388 return std::make_unique<DelayedSubsetUnique_internal::ParallelSparse<false, Value_, Index_> >(my_matrix.get(), my_subset, my_mapping_single, row, false, std::forward<Args_>(args)...);
389 }
390 }
391
392public:
393 std::unique_ptr<MyopicSparseExtractor<Value_, Index_> > sparse(bool row, const Options& opt) const {
394 return populate_myopic_sparse(row, opt);
395 }
396
397 std::unique_ptr<MyopicSparseExtractor<Value_, Index_> > sparse(bool row, Index_ block_start, Index_ block_length, const Options& opt) const {
398 return populate_myopic_sparse(row, block_start, block_length, opt);
399 }
400
401 std::unique_ptr<MyopicSparseExtractor<Value_, Index_> > sparse(bool row, VectorPtr<Index_> indices_ptr, const Options& opt) const {
402 return populate_myopic_sparse(row, std::move(indices_ptr), opt);
403 }
404
405 /**********************
406 *** Oracular dense ***
407 **********************/
408private:
409 template<typename ... Args_>
410 std::unique_ptr<OracularDenseExtractor<Value_, Index_> > populate_oracular_dense(bool row, std::shared_ptr<const Oracle<Index_> > oracle, Args_&& ... args) const {
411 if (row == my_by_row) {
412 return std::make_unique<subset_utils::OracularPerpendicularDense<Value_, Index_> >(my_matrix.get(), my_subset, row, std::move(oracle), std::forward<Args_>(args)...);
413 } else {
414 return std::make_unique<DelayedSubsetUnique_internal::ParallelDense<true, Value_, Index_> >(my_matrix.get(), my_subset, row, std::move(oracle), std::forward<Args_>(args)...);
415 }
416 }
417
418public:
419 std::unique_ptr<OracularDenseExtractor<Value_, Index_> > dense(bool row, std::shared_ptr<const Oracle<Index_> > oracle, const Options& opt) const {
420 return populate_oracular_dense(row, std::move(oracle), opt);
421 }
422
423 std::unique_ptr<OracularDenseExtractor<Value_, Index_> > dense(bool row, std::shared_ptr<const Oracle<Index_> > oracle, Index_ block_start, Index_ block_length, const Options& opt) const {
424 return populate_oracular_dense(row, std::move(oracle), block_start, block_length, opt);
425 }
426
427 std::unique_ptr<OracularDenseExtractor<Value_, Index_> > dense(bool row, std::shared_ptr<const Oracle<Index_> > oracle, VectorPtr<Index_> indices_ptr, const Options& opt) const {
428 return populate_oracular_dense(row, std::move(oracle), std::move(indices_ptr), opt);
429 }
430
431 /***********************
432 *** Oracular sparse ***
433 ***********************/
434private:
435 template<typename ... Args_>
436 std::unique_ptr<OracularSparseExtractor<Value_, Index_> > populate_oracular_sparse(bool row, std::shared_ptr<const Oracle<Index_> > oracle, Args_&& ... args) const {
437 if (row == my_by_row) {
438 return std::make_unique<subset_utils::OracularPerpendicularSparse<Value_, Index_> >(my_matrix.get(), my_subset, row, std::move(oracle), std::forward<Args_>(args)...);
439 } else {
440 return std::make_unique<DelayedSubsetUnique_internal::ParallelSparse<true, Value_, Index_> >(my_matrix.get(), my_subset, my_mapping_single, row, std::move(oracle), std::forward<Args_>(args)...);
441 }
442 }
443
444public:
445 std::unique_ptr<OracularSparseExtractor<Value_, Index_> > sparse(bool row, std::shared_ptr<const Oracle<Index_> > oracle, const Options& opt) const {
446 return populate_oracular_sparse(row, std::move(oracle), opt);
447 }
448
449 std::unique_ptr<OracularSparseExtractor<Value_, Index_> > sparse(bool row, std::shared_ptr<const Oracle<Index_> > oracle, Index_ block_start, Index_ block_length, const Options& opt) const {
450 return populate_oracular_sparse(row, std::move(oracle), block_start, block_length, opt);
451 }
452
453 std::unique_ptr<OracularSparseExtractor<Value_, Index_> > sparse(bool row, std::shared_ptr<const Oracle<Index_> > oracle, VectorPtr<Index_> indices_ptr, const Options& opt) const {
454 return populate_oracular_sparse(row, std::move(oracle), std::move(indices_ptr), opt);
455 }
456};
457
458}
459
460#endif
Delayed subsetting of a matrix with unique indices.
Definition DelayedSubsetUnique.hpp:270
std::unique_ptr< OracularSparseExtractor< Value_, Index_ > > sparse(bool row, std::shared_ptr< const Oracle< Index_ > > oracle, const Options &opt) const
Definition DelayedSubsetUnique.hpp:445
std::unique_ptr< MyopicDenseExtractor< Value_, Index_ > > dense(bool row, const Options &opt) const
Definition DelayedSubsetUnique.hpp:367
bool prefer_rows() const
Definition DelayedSubsetUnique.hpp:333
Index_ ncol() const
Definition DelayedSubsetUnique.hpp:317
std::unique_ptr< MyopicDenseExtractor< Value_, Index_ > > dense(bool row, VectorPtr< Index_ > indices_ptr, const Options &opt) const
Definition DelayedSubsetUnique.hpp:375
std::unique_ptr< OracularSparseExtractor< Value_, Index_ > > sparse(bool row, std::shared_ptr< const Oracle< Index_ > > oracle, Index_ block_start, Index_ block_length, const Options &opt) const
Definition DelayedSubsetUnique.hpp:449
double is_sparse_proportion() const
Definition DelayedSubsetUnique.hpp:329
bool is_sparse() const
Definition DelayedSubsetUnique.hpp:325
double prefer_rows_proportion() const
Definition DelayedSubsetUnique.hpp:337
DelayedSubsetUnique(std::shared_ptr< const Matrix< Value_, Index_ > > matrix, SubsetStorage_ subset, bool by_row, bool check=true)
Definition DelayedSubsetUnique.hpp:280
bool uses_oracle(bool row) const
Definition DelayedSubsetUnique.hpp:341
std::unique_ptr< MyopicSparseExtractor< Value_, Index_ > > sparse(bool row, VectorPtr< Index_ > indices_ptr, const Options &opt) const
Definition DelayedSubsetUnique.hpp:401
std::unique_ptr< MyopicSparseExtractor< Value_, Index_ > > sparse(bool row, Index_ block_start, Index_ block_length, const Options &opt) const
Definition DelayedSubsetUnique.hpp:397
std::unique_ptr< MyopicSparseExtractor< Value_, Index_ > > sparse(bool row, const Options &opt) const
Definition DelayedSubsetUnique.hpp:393
std::unique_ptr< MyopicDenseExtractor< Value_, Index_ > > dense(bool row, Index_ block_start, Index_ block_length, const Options &opt) const
Definition DelayedSubsetUnique.hpp:371
std::unique_ptr< OracularSparseExtractor< Value_, Index_ > > sparse(bool row, std::shared_ptr< const Oracle< Index_ > > oracle, VectorPtr< Index_ > indices_ptr, const Options &opt) const
Definition DelayedSubsetUnique.hpp:453
Index_ nrow() const
Definition DelayedSubsetUnique.hpp:309
std::unique_ptr< OracularDenseExtractor< Value_, Index_ > > dense(bool row, std::shared_ptr< const Oracle< Index_ > > oracle, const Options &opt) const
Definition DelayedSubsetUnique.hpp:419
std::unique_ptr< OracularDenseExtractor< Value_, Index_ > > dense(bool row, std::shared_ptr< const Oracle< Index_ > > oracle, Index_ block_start, Index_ block_length, const Options &opt) const
Definition DelayedSubsetUnique.hpp:423
std::unique_ptr< OracularDenseExtractor< Value_, Index_ > > dense(bool row, std::shared_ptr< const Oracle< Index_ > > oracle, VectorPtr< Index_ > indices_ptr, const Options &opt) const
Definition DelayedSubsetUnique.hpp:427
Virtual class for a matrix.
Definition Matrix.hpp:59
std::unique_ptr< MyopicDenseExtractor< Value_, Index_ > > dense_row() const
Definition Matrix.hpp:287
std::unique_ptr< MyopicDenseExtractor< Value_, Index_ > > dense_column() const
Definition Matrix.hpp:328
std::unique_ptr< MyopicSparseExtractor< Value_, Index_ > > sparse_column() const
Definition Matrix.hpp:537
std::unique_ptr< MyopicSparseExtractor< Value_, Index_ > > sparse_row() const
Definition Matrix.hpp:496
Predict future access requests on the target dimension.
Definition Oracle.hpp:21
Flexible representations for matrix data.
Definition Extractor.hpp:15
typename std::conditional< oracle_, OracularDenseExtractor< Value_, Index_ >, MyopicDenseExtractor< Value_, Index_ > >::type DenseExtractor
Definition Extractor.hpp:273
typename std::conditional< oracle_, OracularSparseExtractor< Value_, Index_ >, MyopicSparseExtractor< Value_, Index_ > >::type SparseExtractor
Definition Extractor.hpp:284
std::shared_ptr< const std::vector< Index_ > > VectorPtr
Definition Matrix.hpp:26
auto consecutive_extractor(const Matrix< Value_, Index_ > *mat, bool row, Index_ iter_start, Index_ iter_length, Args_ &&... args)
Definition consecutive_extractor.hpp:35
Options for accessing data from a Matrix instance.
Definition Options.hpp:30