tatami
C++ API for different matrix representations
Loading...
Searching...
No Matches
DelayedSubsetUnique.hpp
Go to the documentation of this file.
1#ifndef TATAMI_DELAYED_SUBSET_UNIQUE_HPP
2#define TATAMI_DELAYED_SUBSET_UNIQUE_HPP
3
4#include "utils.hpp"
5#include "../base/Matrix.hpp"
6#include "../utils/copy.hpp"
7
8#include <algorithm>
9#include <numeric>
10#include <memory>
11
18namespace tatami {
19
23namespace DelayedSubsetUnique_internal {
24
25template<typename Index_>
26struct DenseParallelResults {
27 std::vector<Index_> sorted;
28 std::vector<Index_> permutation;
29};
30
31template<typename Index_, class SubsetStorage_, class ToIndex_>
32DenseParallelResults<Index_> format_dense_parallel(const SubsetStorage_& subset, Index_ len, ToIndex_ to_index) {
33 std::vector<std::pair<Index_, Index_> > collected;
34 collected.reserve(len);
35 for (Index_ i = 0; i < len; ++i) {
36 collected.emplace_back(subset[to_index(i)], i);
37 }
38 std::sort(collected.begin(), collected.end());
39
40 DenseParallelResults<Index_> output;
41 output.sorted.reserve(len);
42 output.permutation.reserve(len);
43 for (const auto& pp : collected) {
44 output.sorted.push_back(pp.first);
45 output.permutation.push_back(pp.second);
46 }
47
48 return output;
49}
50
51template<bool oracle_, typename Value_, typename Index_>
52class ParallelDense final : public DenseExtractor<oracle_, Value_, Index_> {
53public:
54 template<class SubsetStorage_>
55 ParallelDense(const Matrix<Value_, Index_>* matrix, const SubsetStorage_& subset, bool row, MaybeOracle<oracle_, Index_> oracle, const Options& opt) {
56 auto processed = format_dense_parallel<Index_>(subset, subset.size(), [&](Index_ i) -> Index_ { return i; });
57 initialize(matrix, std::move(processed), row, std::move(oracle), opt);
58 }
59
60 template<class SubsetStorage_>
61 ParallelDense(const Matrix<Value_, Index_>* matrix, const SubsetStorage_& subset, bool row, MaybeOracle<oracle_, Index_> oracle, Index_ block_start, Index_ block_length, const Options& opt) {
62 auto processed = format_dense_parallel<Index_>(subset, block_length, [&](Index_ i) -> Index_ { return i + block_start; });
63 initialize(matrix, std::move(processed), row, std::move(oracle), opt);
64 }
65
66 template<class SubsetStorage_>
67 ParallelDense(const Matrix<Value_, Index_>* matrix, const SubsetStorage_& subset, bool row, MaybeOracle<oracle_, Index_> oracle, VectorPtr<Index_> indices_ptr, const Options& opt) {
68 const auto& indices = *indices_ptr;
69 auto processed = format_dense_parallel<Index_>(subset, indices.size(), [&](Index_ i) -> Index_ { return indices[i]; });
70 initialize(matrix, std::move(processed), row, std::move(oracle), opt);
71 }
72
73private:
74 void initialize(const Matrix<Value_, Index_>* matrix, DenseParallelResults<Index_> processed, bool row, MaybeOracle<oracle_, Index_> oracle, const Options& opt) {
75 my_holding_vbuffer.resize(processed.sorted.size());
76 my_ext = new_extractor<false, oracle_>(matrix, row, std::move(oracle), std::move(processed.sorted), opt);
77 my_permutation = std::move(processed.permutation);
78 }
79
80public:
81 const Value_* fetch(Index_ i, Value_* buffer) {
82 auto src = my_ext->fetch(i, my_holding_vbuffer.data());
83
84 // 'input' and 'output' should not point to the same array. In theory, it
85 // is possible to do an in-place permutation, but this requires another
86 // array anyway to track the permutation status, so we'll just keep it simple.
87 for (auto p : my_permutation) {
88 buffer[p] = *src;
89 ++src;
90 }
91
92 return buffer;
93 }
94
95private:
96 std::unique_ptr<DenseExtractor<oracle_, Value_, Index_> > my_ext;
97 std::vector<Value_> my_holding_vbuffer;
98 std::vector<Index_> my_permutation;
99};
100
101template<typename Index_, class SubsetStorage_, class ToIndex_>
102std::vector<Index_> format_sparse_parallel(const SubsetStorage_& subset, Index_ len, ToIndex_ to_index) {
103 std::vector<Index_> collected;
104 collected.reserve(len);
105 for (Index_ i = 0; i < len; ++i) {
106 collected.emplace_back(subset[to_index(i)]);
107 }
108 std::sort(collected.begin(), collected.end());
109 return collected;
110}
111
112template<bool oracle_, typename Value_, typename Index_>
113class ParallelSparse final : public SparseExtractor<oracle_, Value_, Index_> {
114public:
115 template<class SubsetStorage_>
116 ParallelSparse(
117 const Matrix<Value_, Index_>* matrix,
118 const SubsetStorage_& subset,
119 const std::vector<Index_>& remap,
120 bool row, MaybeOracle<oracle_, Index_> oracle,
121 const Options& opt
122 ) :
123 my_remapping(remap)
124 {
125 auto processed = format_sparse_parallel<Index_>(subset, subset.size(), [](Index_ i) -> Index_ { return i; });
126 initialize(matrix, std::move(processed), row, std::move(oracle), opt);
127 }
128
129 template<class SubsetStorage_>
130 ParallelSparse(
131 const Matrix<Value_, Index_>* matrix,
132 const SubsetStorage_& subset,
133 const std::vector<Index_>& remap,
134 bool row,
135 MaybeOracle<oracle_, Index_> oracle,
136 Index_ block_start,
137 Index_ block_length,
138 const Options& opt
139 ) :
140 my_remapping(remap)
141 {
142 auto processed = format_sparse_parallel<Index_>(subset, block_length, [&](Index_ i) -> Index_ { return i + block_start; });
143 initialize(matrix, std::move(processed), row, std::move(oracle), opt);
144 }
145
146 template<class SubsetStorage_>
147 ParallelSparse(
148 const Matrix<Value_, Index_>* matrix,
149 const SubsetStorage_& subset,
150 const std::vector<Index_>& remap,
151 bool row,
152 MaybeOracle<oracle_, Index_> oracle,
153 VectorPtr<Index_> indices_ptr,
154 const Options& opt
155 ) :
156 my_remapping(remap)
157 {
158 const auto& indices = *indices_ptr;
159 auto processed = format_sparse_parallel<Index_>(subset, indices.size(), [&](Index_ i) -> Index_ { return indices[i]; });
160 initialize(matrix, std::move(processed), row, std::move(oracle), opt);
161 }
162
163private:
164 void initialize(const Matrix<Value_, Index_>* matrix, std::vector<Index_> sorted, bool row, MaybeOracle<oracle_, Index_> oracle, Options opt) {
165 my_needs_value = opt.sparse_extract_value;
166 my_needs_index = opt.sparse_extract_index;
167 my_needs_sort = opt.sparse_ordered_index;
168
169 // The conditionals here mirror those in 'fetch',
170 // to self-document the case where each of the temporaries are needed.
171 if (!my_needs_sort) {
172 if (my_needs_index) {
173 ; // no 'my_holding_ibuffer' required as a user-provided 'index_buffer' should be available.
174 }
175
176 } else if (my_needs_value) {
177 opt.sparse_extract_index = true;
178 my_sortspace.reserve(sorted.size());
179 if (my_needs_index) {
180 ; // no 'my_holding_ibuffer' required as a user-provided 'index_buffer' should be available.
181 } else {
182 my_holding_ibuffer.resize(sorted.size()); // needs 'my_holding_ibuffer' as user-provided 'index_buffer' may be NULL.
183 }
184
185 } else if (my_needs_index) {
186 ; // no 'my_holding_ibuffer' required as a user-provided 'index_buffer' should be available.
187 }
188
189 my_ext = new_extractor<true, oracle_>(matrix, row, std::move(oracle), std::move(sorted), opt);
190 }
191
192public:
193 SparseRange<Value_, Index_> fetch(Index_ i, Value_* value_buffer, Index_* index_buffer) {
194 auto input = my_ext->fetch(i, value_buffer, (my_holding_ibuffer.empty() ? index_buffer : my_holding_ibuffer.data()));
195
196 // Pointers in 'input' and the 'buffer' pointers may point to the same array,
197 // as we're either just modifiying in place or we're copying to 'my_sortspace'.
198 if (!my_needs_sort) {
199 if (my_needs_index) {
200 for (Index_ i = 0; i < input.number; ++i) {
201 index_buffer[i] = my_remapping[input.index[i]];
202 }
203 input.index = index_buffer;
204 }
205
206 } else if (my_needs_value) {
207 // We assume that the subset have already been extracted for sorting
208 // purposes, even if they weren't actually requested.
209 my_sortspace.clear();
210 for (Index_ i = 0; i < input.number; ++i) {
211 my_sortspace.emplace_back(my_remapping[input.index[i]], input.value[i]);
212 }
213 std::sort(my_sortspace.begin(), my_sortspace.end());
214
215 auto vcopy = value_buffer;
216 for (const auto& ss : my_sortspace) {
217 *vcopy = ss.second;
218 ++vcopy;
219 }
220 input.value = value_buffer;
221
222 if (my_needs_index) {
223 auto icopy = index_buffer;
224 for (const auto& ss : my_sortspace) {
225 *icopy = ss.first;
226 ++icopy;
227 }
228 input.index = index_buffer;
229 } else {
230 input.index = NULL;
231 }
232
233 } else if (my_needs_index) {
234 for (Index_ i = 0; i < input.number; ++i) {
235 index_buffer[i] = my_remapping[input.index[i]];
236 }
237 std::sort(index_buffer, index_buffer + input.number);
238 input.index = index_buffer;
239 }
240
241 return input;
242 }
243
244private:
245 const std::vector<Index_>& my_remapping;
246 std::unique_ptr<SparseExtractor<oracle_, Value_, Index_> > my_ext;
247 bool my_needs_value, my_needs_index, my_needs_sort;
248 std::vector<std::pair<Index_, Value_> > my_sortspace;
249 std::vector<Index_> my_holding_ibuffer;
250};
251
252}
268template<typename Value_, typename Index_, class SubsetStorage_>
269class DelayedSubsetUnique final : public Matrix<Value_, Index_> {
270public:
279 DelayedSubsetUnique(std::shared_ptr<const Matrix<Value_, Index_> > matrix, SubsetStorage_ subset, bool by_row, bool check = true) :
280 my_matrix(std::move(matrix)), my_subset(std::move(subset)), my_by_row(by_row)
281 {
282 Index_ fulldim = my_by_row ? my_matrix->nrow() : my_matrix->ncol();
283
284 if (check) {
285 std::vector<unsigned char> checks(fulldim);
286 for (Index_ i = 0, end = my_subset.size(); i < end; ++i) {
287 auto& found = checks[my_subset[i]];
288 if (found) {
289 throw std::runtime_error("my_subset should be unique");
290 }
291 found = 1;
292 }
293 }
294
295 my_mapping_single.resize(fulldim);
296 for (Index_ i = 0, end = my_subset.size(); i < end; ++i) {
297 my_mapping_single[my_subset[i]] = i;
298 }
299 }
300
301private:
302 std::shared_ptr<const Matrix<Value_, Index_> > my_matrix;
303 SubsetStorage_ my_subset;
304 bool my_by_row;
305 std::vector<Index_> my_mapping_single;
306
307public:
308 Index_ nrow() const {
309 if (my_by_row) {
310 return my_subset.size();
311 } else {
312 return my_matrix->nrow();
313 }
314 }
315
316 Index_ ncol() const {
317 if (my_by_row) {
318 return my_matrix->ncol();
319 } else {
320 return my_subset.size();
321 }
322 }
323
324 bool is_sparse() const {
325 return my_matrix->is_sparse();
326 }
327
328 double is_sparse_proportion() const {
329 return my_matrix->is_sparse_proportion();
330 }
331
332 bool prefer_rows() const {
333 return my_matrix->prefer_rows();
334 }
335
336 double prefer_rows_proportion() const {
337 return my_matrix->prefer_rows_proportion();
338 }
339
340 bool uses_oracle(bool row) const {
341 return my_matrix->uses_oracle(row);
342 }
343
344 using Matrix<Value_, Index_>::dense_column;
345
346 using Matrix<Value_, Index_>::dense_row;
347
348 using Matrix<Value_, Index_>::sparse_column;
349
350 using Matrix<Value_, Index_>::sparse_row;
351
352 /********************
353 *** Myopic dense ***
354 ********************/
355private:
356 template<typename ... Args_>
357 std::unique_ptr<MyopicDenseExtractor<Value_, Index_> > populate_myopic_dense(bool row, Args_&& ... args) const {
358 if (row == my_by_row) {
359 return std::make_unique<subset_utils::MyopicPerpendicularDense<Value_, Index_, SubsetStorage_> >(my_matrix.get(), my_subset, row, std::forward<Args_>(args)...);
360 } else {
361 return std::make_unique<DelayedSubsetUnique_internal::ParallelDense<false, Value_, Index_> >(my_matrix.get(), my_subset, row, false, std::forward<Args_>(args)...);
362 }
363 }
364
365public:
366 std::unique_ptr<MyopicDenseExtractor<Value_, Index_> > dense(bool row, const Options& opt) const {
367 return populate_myopic_dense(row, opt);
368 }
369
370 std::unique_ptr<MyopicDenseExtractor<Value_, Index_> > dense(bool row, Index_ block_start, Index_ block_length, const Options& opt) const {
371 return populate_myopic_dense(row, block_start, block_length, opt);
372 }
373
374 std::unique_ptr<MyopicDenseExtractor<Value_, Index_> > dense(bool row, VectorPtr<Index_> indices_ptr, const Options& opt) const {
375 return populate_myopic_dense(row, std::move(indices_ptr), opt);
376 }
377
378 /*********************
379 *** Myopic sparse ***
380 *********************/
381private:
382 template<typename ... Args_>
383 std::unique_ptr<MyopicSparseExtractor<Value_, Index_> > populate_myopic_sparse(bool row, Args_&& ... args) const {
384 if (row == my_by_row) {
385 return std::make_unique<subset_utils::MyopicPerpendicularSparse<Value_, Index_, SubsetStorage_> >(my_matrix.get(), my_subset, row, std::forward<Args_>(args)...);
386 } else {
387 return std::make_unique<DelayedSubsetUnique_internal::ParallelSparse<false, Value_, Index_> >(my_matrix.get(), my_subset, my_mapping_single, row, false, std::forward<Args_>(args)...);
388 }
389 }
390
391public:
392 std::unique_ptr<MyopicSparseExtractor<Value_, Index_> > sparse(bool row, const Options& opt) const {
393 return populate_myopic_sparse(row, opt);
394 }
395
396 std::unique_ptr<MyopicSparseExtractor<Value_, Index_> > sparse(bool row, Index_ block_start, Index_ block_length, const Options& opt) const {
397 return populate_myopic_sparse(row, block_start, block_length, opt);
398 }
399
400 std::unique_ptr<MyopicSparseExtractor<Value_, Index_> > sparse(bool row, VectorPtr<Index_> indices_ptr, const Options& opt) const {
401 return populate_myopic_sparse(row, std::move(indices_ptr), opt);
402 }
403
404 /**********************
405 *** Oracular dense ***
406 **********************/
407private:
408 template<typename ... Args_>
409 std::unique_ptr<OracularDenseExtractor<Value_, Index_> > populate_oracular_dense(bool row, std::shared_ptr<const Oracle<Index_> > oracle, Args_&& ... args) const {
410 if (row == my_by_row) {
411 return std::make_unique<subset_utils::OracularPerpendicularDense<Value_, Index_> >(my_matrix.get(), my_subset, row, std::move(oracle), std::forward<Args_>(args)...);
412 } else {
413 return std::make_unique<DelayedSubsetUnique_internal::ParallelDense<true, Value_, Index_> >(my_matrix.get(), my_subset, row, std::move(oracle), std::forward<Args_>(args)...);
414 }
415 }
416
417public:
418 std::unique_ptr<OracularDenseExtractor<Value_, Index_> > dense(bool row, std::shared_ptr<const Oracle<Index_> > oracle, const Options& opt) const {
419 return populate_oracular_dense(row, std::move(oracle), opt);
420 }
421
422 std::unique_ptr<OracularDenseExtractor<Value_, Index_> > dense(bool row, std::shared_ptr<const Oracle<Index_> > oracle, Index_ block_start, Index_ block_length, const Options& opt) const {
423 return populate_oracular_dense(row, std::move(oracle), block_start, block_length, opt);
424 }
425
426 std::unique_ptr<OracularDenseExtractor<Value_, Index_> > dense(bool row, std::shared_ptr<const Oracle<Index_> > oracle, VectorPtr<Index_> indices_ptr, const Options& opt) const {
427 return populate_oracular_dense(row, std::move(oracle), std::move(indices_ptr), opt);
428 }
429
430 /***********************
431 *** Oracular sparse ***
432 ***********************/
433private:
434 template<typename ... Args_>
435 std::unique_ptr<OracularSparseExtractor<Value_, Index_> > populate_oracular_sparse(bool row, std::shared_ptr<const Oracle<Index_> > oracle, Args_&& ... args) const {
436 if (row == my_by_row) {
437 return std::make_unique<subset_utils::OracularPerpendicularSparse<Value_, Index_> >(my_matrix.get(), my_subset, row, std::move(oracle), std::forward<Args_>(args)...);
438 } else {
439 return std::make_unique<DelayedSubsetUnique_internal::ParallelSparse<true, Value_, Index_> >(my_matrix.get(), my_subset, my_mapping_single, row, std::move(oracle), std::forward<Args_>(args)...);
440 }
441 }
442
443public:
444 std::unique_ptr<OracularSparseExtractor<Value_, Index_> > sparse(bool row, std::shared_ptr<const Oracle<Index_> > oracle, const Options& opt) const {
445 return populate_oracular_sparse(row, std::move(oracle), opt);
446 }
447
448 std::unique_ptr<OracularSparseExtractor<Value_, Index_> > sparse(bool row, std::shared_ptr<const Oracle<Index_> > oracle, Index_ block_start, Index_ block_length, const Options& opt) const {
449 return populate_oracular_sparse(row, std::move(oracle), block_start, block_length, opt);
450 }
451
452 std::unique_ptr<OracularSparseExtractor<Value_, Index_> > sparse(bool row, std::shared_ptr<const Oracle<Index_> > oracle, VectorPtr<Index_> indices_ptr, const Options& opt) const {
453 return populate_oracular_sparse(row, std::move(oracle), std::move(indices_ptr), opt);
454 }
455};
456
457}
458
459#endif
Virtual class for a matrix of some numeric type.
Delayed subsetting of a matrix with unique indices.
Definition DelayedSubsetUnique.hpp:269
std::unique_ptr< OracularSparseExtractor< Value_, Index_ > > sparse(bool row, std::shared_ptr< const Oracle< Index_ > > oracle, const Options &opt) const
Definition DelayedSubsetUnique.hpp:444
std::unique_ptr< MyopicDenseExtractor< Value_, Index_ > > dense(bool row, const Options &opt) const
Definition DelayedSubsetUnique.hpp:366
bool prefer_rows() const
Definition DelayedSubsetUnique.hpp:332
Index_ ncol() const
Definition DelayedSubsetUnique.hpp:316
std::unique_ptr< MyopicDenseExtractor< Value_, Index_ > > dense(bool row, VectorPtr< Index_ > indices_ptr, const Options &opt) const
Definition DelayedSubsetUnique.hpp:374
std::unique_ptr< OracularSparseExtractor< Value_, Index_ > > sparse(bool row, std::shared_ptr< const Oracle< Index_ > > oracle, Index_ block_start, Index_ block_length, const Options &opt) const
Definition DelayedSubsetUnique.hpp:448
double is_sparse_proportion() const
Definition DelayedSubsetUnique.hpp:328
bool is_sparse() const
Definition DelayedSubsetUnique.hpp:324
double prefer_rows_proportion() const
Definition DelayedSubsetUnique.hpp:336
DelayedSubsetUnique(std::shared_ptr< const Matrix< Value_, Index_ > > matrix, SubsetStorage_ subset, bool by_row, bool check=true)
Definition DelayedSubsetUnique.hpp:279
bool uses_oracle(bool row) const
Definition DelayedSubsetUnique.hpp:340
std::unique_ptr< MyopicSparseExtractor< Value_, Index_ > > sparse(bool row, VectorPtr< Index_ > indices_ptr, const Options &opt) const
Definition DelayedSubsetUnique.hpp:400
std::unique_ptr< MyopicSparseExtractor< Value_, Index_ > > sparse(bool row, Index_ block_start, Index_ block_length, const Options &opt) const
Definition DelayedSubsetUnique.hpp:396
std::unique_ptr< MyopicSparseExtractor< Value_, Index_ > > sparse(bool row, const Options &opt) const
Definition DelayedSubsetUnique.hpp:392
std::unique_ptr< MyopicDenseExtractor< Value_, Index_ > > dense(bool row, Index_ block_start, Index_ block_length, const Options &opt) const
Definition DelayedSubsetUnique.hpp:370
std::unique_ptr< OracularSparseExtractor< Value_, Index_ > > sparse(bool row, std::shared_ptr< const Oracle< Index_ > > oracle, VectorPtr< Index_ > indices_ptr, const Options &opt) const
Definition DelayedSubsetUnique.hpp:452
Index_ nrow() const
Definition DelayedSubsetUnique.hpp:308
std::unique_ptr< OracularDenseExtractor< Value_, Index_ > > dense(bool row, std::shared_ptr< const Oracle< Index_ > > oracle, const Options &opt) const
Definition DelayedSubsetUnique.hpp:418
std::unique_ptr< OracularDenseExtractor< Value_, Index_ > > dense(bool row, std::shared_ptr< const Oracle< Index_ > > oracle, Index_ block_start, Index_ block_length, const Options &opt) const
Definition DelayedSubsetUnique.hpp:422
std::unique_ptr< OracularDenseExtractor< Value_, Index_ > > dense(bool row, std::shared_ptr< const Oracle< Index_ > > oracle, VectorPtr< Index_ > indices_ptr, const Options &opt) const
Definition DelayedSubsetUnique.hpp:426
Virtual class for a matrix.
Definition Matrix.hpp:59
std::unique_ptr< MyopicDenseExtractor< Value_, Index_ > > dense_row() const
Definition Matrix.hpp:287
std::unique_ptr< MyopicDenseExtractor< Value_, Index_ > > dense_column() const
Definition Matrix.hpp:328
std::unique_ptr< MyopicSparseExtractor< Value_, Index_ > > sparse_column() const
Definition Matrix.hpp:537
std::unique_ptr< MyopicSparseExtractor< Value_, Index_ > > sparse_row() const
Definition Matrix.hpp:496
Predict future access requests on the target dimension.
Definition Oracle.hpp:23
Copy data from one buffer to another.
Flexible representations for matrix data.
Definition Extractor.hpp:15
std::shared_ptr< const std::vector< Index_ > > VectorPtr
Definition Matrix.hpp:26
typename std::conditional< oracle_, OracularDenseExtractor< Value_, Index_ >, MyopicDenseExtractor< Value_, Index_ > >::type DenseExtractor
Definition Extractor.hpp:273
Options for accessing data from a Matrix instance.
Definition Options.hpp:30