tatami
C++ API for different matrix representations
Loading...
Searching...
No Matches
convert_to_fragmented_sparse.hpp
Go to the documentation of this file.
1#ifndef TATAMI_CONVERT_TO_FRAGMENTED_SPARSE_H
2#define TATAMI_CONVERT_TO_FRAGMENTED_SPARSE_H
3
4#include <memory>
5#include <vector>
6#include <cstddef>
7#include <optional>
8
10#include "convert_to_sparse_utils.hpp"
11
13#include "../utils/copy.hpp"
16
23namespace tatami {
24
35template<typename Value_, typename Index_>
40 FragmentedSparseContents(Index_ n) :
41 value(cast_Index_to_container_size<I<decltype(value)> >(n)),
43 {}
52 std::vector<std::vector<Value_> > value;
53
59 std::vector<std::vector<Index_> > index;
60};
61
78
82template<typename StoredValue_, typename StoredIndex_, typename InputValue_, typename InputIndex_>
83FragmentedSparseContents<StoredValue_, StoredIndex_> retrieve_fragmented_sparse_contents_consistent(
85 const bool row,
87) {
88 const InputIndex_ NR = matrix.nrow();
89 const InputIndex_ NC = matrix.ncol();
90 const InputIndex_ primary = (row ? NR : NC);
91 const InputIndex_ secondary = (row ? NC : NR);
92
94 auto& store_v = output.value;
95 auto& store_i = output.index;
96
97 if (matrix.is_sparse()) {
98 parallelize([&](const int, const InputIndex_ start, const InputIndex_ length) -> void {
99 auto wrk = consecutive_extractor<true>(matrix, row, start, length);
102
103 for (InputIndex_ p = start, pe = start + length; p < pe; ++p) {
104 const auto range = wrk->fetch(buffer_v.data(), buffer_i.data());
105 auto& sv = store_v[p];
106 auto& si = store_i[p];
107 sv.reserve(range.number);
108 si.reserve(range.number);
109
110 // We don't filter out structural non-zeros that have values of zero, for consistency with convert_to_compressed_sparse().
111 for (InputIndex_ i = 0; i < range.number; ++i) {
112 sv.push_back(range.value[i]);
113 si.push_back(range.index[i]);
114 }
115 }
116 }, primary, options.num_threads);
117
118 } else {
119 parallelize([&](const int, const InputIndex_ start, const InputIndex_ length) -> void {
120 auto wrk = consecutive_extractor<false>(matrix, row, start, length);
122
123 // Special conversion from dense to save ourselves from having to make
124 // indices that we aren't really interested in.
125 for (InputIndex_ p = start, pe = start + length; p < pe; ++p) {
126 const auto ptr = wrk->fetch(buffer_v.data());
127 auto& sv = store_v[p];
128 auto& si = store_i[p];
129
130 for (InputIndex_ s = 0; s < secondary; ++s) {
131 const auto val = ptr[s];
132 if (val) {
133 sv.push_back(val);
134 si.push_back(s);
135 }
136 }
137 }
138 }, primary, options.num_threads);
139 }
140
141 return output;
142}
159template<typename StoredValue_, typename StoredIndex_, typename InputValue_, typename InputIndex_>
162 const bool row,
164) {
165 if (row == matrix.prefer_rows()) {
166 return retrieve_fragmented_sparse_contents_consistent<StoredValue_, StoredIndex_>(matrix, row, options);
167 }
168
169 const InputIndex_ NR = matrix.nrow();
170 const InputIndex_ NC = matrix.ncol();
171 const InputIndex_ primary = (row ? NR : NC);
172 const InputIndex_ secondary = (row ? NC : NR);
173
174 if (!options.two_pass) {
175 // In the one-pass strategy, we load everything in a nice format first, then we transpose it in serial.
176 // This avoids messy reallocations when trying to expand vectors on an inconsistent dimension.
177 auto tmp = retrieve_fragmented_sparse_contents_consistent<StoredValue_, StoredIndex_>(matrix, !row, options);
178 auto primary_counts = create_container_of_Index_size<std::vector<InputIndex_> >(primary);
179 for (I<decltype(secondary)> s = 0; s < secondary; ++s) {
180 const auto& sec_indices = tmp.index[s];
181 const auto num = sec_indices.size();
182 for (I<decltype(num)> n = 0; n < num; ++n) {
183 primary_counts[sec_indices[n]] += 1; // addition must be space, this cannot exceed dimension extents.
184 }
185 }
186
188 for (InputIndex_ p = 0; p < primary; ++p) {
189 output.index[p].reserve(primary_counts[p]);
190 output.value[p].reserve(primary_counts[p]);
191 }
192
193 for (I<decltype(secondary)> s = 0; s < secondary; ++s) {
194 const auto& sec_values = tmp.value[s];
195 const auto& sec_indices = tmp.index[s];
196 const auto num = sec_indices.size();
197 for (I<decltype(num)> n = 0; n < num; ++n) {
198 const auto curp = sec_indices[n];
199 output.value[curp].push_back(sec_values[n]);
200 output.index[curp].push_back(s);
201 }
202 }
203
204 return output;
205 }
206
207 // In the two-pass strategy, we count the number of non-zeros first, then we fill it up in the second pass.
208 std::optional<std::vector<InputIndex_> > nnz_consistent;
209 auto nnz_inconsistent = create_container_of_Index_size<std::vector<InputIndex_> >(primary);
210 count_sparse_non_zeros_inconsistent(matrix, primary, secondary, row, nnz_inconsistent.data(), nnz_consistent, options.num_threads);
211
213 for (InputIndex_ p = 0; p < primary; ++p) {
214 output.index[p].reserve(nnz_inconsistent[p]);
215 output.value[p].reserve(nnz_inconsistent[p]);
216 }
217
218 fill_sparse_matrix_inconsistent(
219 matrix,
220 primary,
221 secondary,
222 row,
223 nnz_consistent,
224 /* sparse_main = */ [&](const InputIndex_ s, const SparseRange<InputValue_, InputIndex_>& range) -> void {
225 for (InputIndex_ i = 0; i < range.number; ++i) {
226 output.value[range.index[i]].push_back(range.value[i]);
227 output.index[range.index[i]].push_back(s);
228 }
229 },
230 /* dense_main = */ [&](const InputIndex_ s, const InputValue_* const ptr) -> void {
231 for (InputIndex_ p = 0; p < primary; ++p) {
232 const auto val = ptr[p];
233 if (val != 0) {
234 output.value[p].push_back(val);
235 output.index[p].push_back(s);
236 }
237 }
238 },
239 /* reduce = */ [&](const InputIndex_ s, const std::vector<InputValue_>& cur_values, const std::vector<InputIndex_>& cur_primary_indices) {
240 const auto cur_count = cur_values.size();
241 for (I<decltype(cur_count)> i = 0; i < cur_count; ++i) {
242 const auto primary = cur_primary_indices[i];
243 output.value[primary].push_back(cur_values[i]);
244 output.index[primary].push_back(s);
245 }
246 },
247 options.num_threads
248 );
249
250 return output;
251}
252
262 bool two_pass = false;
263
267 int num_threads = 1;
268};
269
285template<
286 typename Value_,
287 typename Index_,
288 typename StoredValue_ = Value_,
289 typename StoredIndex_ = Index_,
290 typename InputValue_,
291 typename InputIndex_
292>
293std::shared_ptr<Matrix<Value_, Index_> > convert_to_fragmented_sparse(
295 const bool row,
297{
299 matrix,
300 row,
301 [&]{
303 ropt.two_pass = options.two_pass;
304 ropt.num_threads = options.num_threads;
305 return ropt;
306 }()
307 );
308 return std::shared_ptr<Matrix<Value_, Index_> >(
310 Value_,
311 Index_,
312 std::vector<std::vector<StoredValue_> >,
313 std::vector<std::vector<StoredIndex_> >
314 >(
315 matrix.nrow(),
316 matrix.ncol(),
317 std::move(frag.value),
318 std::move(frag.index),
319 row,
320 []{
321 FragmentedSparseMatrixOptions fopt;
322 fopt.check = false; // no need for checks, as we guarantee correctness.
323 return fopt;
324 }()
325 )
326 );
327}
328
332// Backwards compatbility.
333template<typename Value_, typename Index_, typename StoredValue_ = Value_, typename StoredIndex_ = Index_, typename InputValue_, typename InputIndex_>
334std::shared_ptr<Matrix<Value_, Index_> > convert_to_fragmented_sparse(const Matrix<InputValue_, InputIndex_>* matrix, bool row, int threads = 1) {
336 *matrix,
337 row,
338 [&]{
339 ConvertToFragmentedSparseOptions opt;
340 opt.num_threads = threads;
341 return opt;
342 }()
343 );
344}
345
346template<typename StoredValue_, typename StoredIndex_, typename InputValue_, typename InputIndex_>
347FragmentedSparseContents<StoredValue_, StoredIndex_> retrieve_fragmented_sparse_contents(const Matrix<InputValue_, InputIndex_>* matrix, bool row, int threads = 1) {
349 *matrix,
350 row,
351 [&]{
352 RetrieveFragmentedSparseContentsOptions opt;
353 opt.num_threads = threads;
354 return opt;
355 }()
356 );
357}
358
359template <bool row_, typename StoredValue_, typename StoredIndex_, typename InputValue_, typename InputIndex_>
360FragmentedSparseContents<StoredValue_, StoredIndex_> retrieve_fragmented_sparse_contents(const Matrix<InputValue_, InputIndex_>* matrix, int threads = 1) {
362}
363
364template <bool row_, typename Value_, typename Index_, typename StoredValue_ = Value_, typename StoredIndex_ = Index_, typename InputValue_, typename InputIndex_>
365std::shared_ptr<Matrix<Value_, Index_> > convert_to_fragmented_sparse(const Matrix<InputValue_, InputIndex_>* matrix, int threads = 1) {
367}
372}
373
374#endif
Fragmented sparse matrix representation.
Convert index type to container size.
Fragmented sparse matrix representation.
Definition FragmentedSparseMatrix.hpp:570
Virtual class for a matrix.
Definition Matrix.hpp:59
virtual Index_ ncol() const =0
virtual Index_ nrow() const =0
virtual bool prefer_rows() const =0
virtual bool is_sparse() const =0
Templated construction of a new consecutive extractor.
Copy data from one buffer to another.
Flexible representations for matrix data.
Definition Extractor.hpp:15
FragmentedSparseContents< StoredValue_, StoredIndex_ > retrieve_fragmented_sparse_contents(const Matrix< InputValue_, InputIndex_ > &matrix, const bool row, const RetrieveFragmentedSparseContentsOptions &options)
Definition convert_to_fragmented_sparse.hpp:160
int parallelize(Function_ fun, const Index_ tasks, const int workers)
Definition parallelize.hpp:58
I< decltype(std::declval< Container_ >().size())> cast_Index_to_container_size(const Index_ x)
Definition Index_to_container.hpp:65
std::shared_ptr< Matrix< Value_, Index_ > > convert_to_fragmented_sparse(const Matrix< InputValue_, InputIndex_ > &matrix, const bool row, const ConvertToFragmentedSparseOptions &options)
Definition convert_to_fragmented_sparse.hpp:293
Container_ create_container_of_Index_size(const Index_ x, Args_ &&... args)
Definition Index_to_container.hpp:82
auto consecutive_extractor(const Matrix< Value_, Index_ > &matrix, const bool row, const Index_ iter_start, const Index_ iter_length, Args_ &&... args)
Definition consecutive_extractor.hpp:35
Parallelized iteration over a tatami::Matrix.
Options for convert_to_fragmented_sparse().
Definition convert_to_fragmented_sparse.hpp:256
bool two_pass
Definition convert_to_fragmented_sparse.hpp:262
int num_threads
Definition convert_to_fragmented_sparse.hpp:267
Fragmented sparse contents.
Definition convert_to_fragmented_sparse.hpp:36
std::vector< std::vector< Value_ > > value
Definition convert_to_fragmented_sparse.hpp:52
std::vector< std::vector< Index_ > > index
Definition convert_to_fragmented_sparse.hpp:59
Options for retrieve_fragmented_sparse_contents().
Definition convert_to_fragmented_sparse.hpp:65
int num_threads
Definition convert_to_fragmented_sparse.hpp:76
bool two_pass
Definition convert_to_fragmented_sparse.hpp:71
A range of a sparse vector.
Definition SparseRange.hpp:32
const Value_ * value
Definition SparseRange.hpp:56
Index_ number
Definition SparseRange.hpp:50
const Index_ * index
Definition SparseRange.hpp:62