tatami_layered
Create layered sparse matrices for tatami
Loading...
Searching...
No Matches
convert_to_layered_sparse.hpp
Go to the documentation of this file.
1#ifndef TATAMI_CONVERT_TO_LAYERED_SPARSE_HPP
2#define TATAMI_CONVERT_TO_LAYERED_SPARSE_HPP
3
4#include <cstdint>
5#include <vector>
6#include <memory>
7#include <limits>
8#include <algorithm>
9
10#include "tatami/tatami.hpp"
11#include "sanisizer/sanisizer.hpp"
12
13#include "utils.hpp"
14
20namespace tatami_layered {
21
25template<typename ColIndex_, typename ValueOut_ = double, typename IndexOut_ = int, typename ValueIn_, typename IndexIn_>
26std::shared_ptr<tatami::Matrix<ValueOut_, IndexOut_> > convert_by_row(const tatami::Matrix<ValueIn_, IndexIn_>& mat, const IndexIn_ chunk_size, const int nthreads) {
27 const auto NR = mat.nrow(), NC = mat.ncol();
28 const IndexIn_ leftovers = NC % chunk_size;
29 const IndexIn_ nchunks = sanisizer::max(1, NC / chunk_size + (leftovers != 0));
30
34
38
41
42 // First pass to define the allocations.
43 {
45 for (auto& x : max_per_chunk) {
47 }
48
50 for (auto& x : num_per_chunk) {
52 }
53
54 if (mat.sparse()) {
55 tatami::parallelize([&](const int, const IndexIn_ start, const IndexIn_ length) -> void {
56 auto ext = tatami::consecutive_extractor<true>(mat, true, start, length, [&]{
58 opt.sparse_ordered_index = false;
59 return opt;
60 }());
63
64 for (IndexIn_ r = start, end = start + length; r < end; ++r) {
65 const auto range = ext->fetch(r, dbuffer.data(), ibuffer.data());
66 for (IndexIn_ i = 0; i < range.number; ++i) {
67 if (range.value[i]) {
68 const auto chunk = range.index[i] / chunk_size;
69 const auto cat = categorize(range.value[i]);
70 max_per_chunk[chunk][r] = std::max(max_per_chunk[chunk][r], cat);
71 ++num_per_chunk[chunk][r];
72 }
73 }
74 }
75 }, NR, nthreads);
76
77 } else {
78 tatami::parallelize([&](const int, const IndexIn_ start, const IndexIn_ length) -> void {
79 auto ext = tatami::consecutive_extractor<false>(mat, true, start, length);
81
82 for (IndexIn_ r = start, end = start + length; r < end; ++r) {
83 auto ptr = ext->fetch(r, dbuffer.data());
84 for (IndexIn_ c = 0; c < NC; ++c) {
85 if (ptr[c]) {
86 const auto chunk = c / chunk_size;
87 const auto cat = categorize(ptr[c]);
88 max_per_chunk[chunk][r] = std::max(max_per_chunk[chunk][r], cat);
89 ++num_per_chunk[chunk][r];
90 }
91 }
92 }
93 }, NR, nthreads);
94 }
95
96 allocate_rows(
97 max_per_chunk,
98 num_per_chunk,
99 identities8,
100 identities16,
101 identities32,
102 store8,
103 store16,
104 store32,
105 assigned_category,
106 assigned_position
107 );
108 }
109
110 // Second pass to actually fill the vectors.
111 {
112 tatami::parallelize([&](const int, const IndexIn_ start, const IndexIn_ length) -> void {
115
116 if (mat.sparse()) {
118 auto ext = tatami::consecutive_extractor<true>(mat, true, start, length);
119
120 for (IndexIn_ r = start, end = start + length; r < end; ++r) {
121 for (I<decltype(nchunks)> chunk = 0; chunk < nchunks; ++chunk) {
122 output_positions[chunk] = get_sparse_ptr(store8, store16, store32, assigned_category, assigned_position, chunk, r);
123 }
124
125 auto range = ext->fetch(r, dbuffer.data(), ibuffer.data());
126 for (IndexIn_ i = 0; i < range.number; ++i) {
127 if (range.value[i]) {
128 const IndexIn_ chunk = range.index[i] / chunk_size;
129 const IndexIn_ col = range.index[i] % chunk_size;
130 fill_sparse_value(store8, store16, store32, assigned_category[chunk][r], chunk, col, range.value[i], output_positions[chunk]++);
131 }
132 }
133 }
134
135 } else {
136 auto ext = tatami::consecutive_extractor<false>(mat, true, start, length);
137
138 for (IndexIn_ r = start, end = start + length; r < end; ++r) {
139 for (I<decltype(nchunks)> chunk = 0; chunk < nchunks; ++chunk) {
140 output_positions[chunk] = get_sparse_ptr(store8, store16, store32, assigned_category, assigned_position, chunk, r);
141 }
142
143 auto ptr = ext->fetch(r, dbuffer.data());
144 for (IndexIn_ c = 0; c < NC; ++c) {
145 if (ptr[c]) {
146 const IndexIn_ chunk = c / chunk_size;
147 const IndexIn_ col = c % chunk_size;
148 fill_sparse_value(store8, store16, store32, assigned_category[chunk][r], chunk, col, ptr[c], output_positions[chunk]++);
149 }
150 }
151 }
152 }
153
154 }, NR, nthreads);
155 }
156
157 return consolidate_matrices<ValueOut_, IndexOut_>(
158 identities8,
159 identities16,
160 identities32,
161 std::move(store8),
162 std::move(store16),
163 std::move(store32),
164 NR,
165 chunk_size,
166 leftovers
167 );
168}
169
170template<typename Value_, typename IndexIn_>
171std::vector<std::vector<Value_> > convert_by_column_allocate_store_per_chunk(const IndexIn_ nchunks, const IndexIn_ NR) {
173 for (auto& x : output) {
175 }
176 return output;
177};
178
179template<typename Value_, typename IndexIn_>
180std::vector<std::vector<Value_> >& convert_by_column_get_store_per_chunk(
181 int thread,
182 std::vector<std::vector<Value_> >& base,
183 std::optional<std::vector<std::vector<Value_> > >& tmp,
184 const IndexIn_ nchunks,
185 const IndexIn_ NR
186) {
187 if (thread) {
188 tmp = convert_by_column_allocate_store_per_chunk<Value_>(nchunks, NR);
189 return *tmp;
190 } else {
191 return base;
192 }
193}
194
195template<typename Value_>
196void convert_by_column_save_store_per_chunk(
197 int thread,
198 std::optional<std::vector<std::vector<Value_> > >& tmp,
199 std::vector<std::vector<std::vector<Value_> > >& collected
200) {
201 if (thread) {
202 collected[thread - 1] = std::move(*tmp);
203 }
204};
205
206template<typename ColIndex_, typename ValueOut_ = double, typename IndexOut_ = int, typename ValueIn_, typename IndexIn_>
207std::shared_ptr<tatami::Matrix<ValueOut_, IndexOut_> > convert_by_column(const tatami::Matrix<ValueIn_, IndexIn_>& mat, const IndexIn_ chunk_size, const int nthreads) {
208 const auto NR = mat.nrow(), NC = mat.ncol();
209 const IndexIn_ leftovers = NC % chunk_size;
210 const IndexIn_ nchunks = sanisizer::max(1, NC / chunk_size + (leftovers != 0));
211
215
219
222
223 // First pass to define the allocations.
224 {
225 auto max_per_chunk = convert_by_column_allocate_store_per_chunk<Category>(nchunks, NR);
226 auto max_per_chunk_threaded = sanisizer::create<std::vector<std::vector<std::vector<Category> > > >(nthreads - 1);
227 auto num_per_chunk = convert_by_column_allocate_store_per_chunk<IndexIn_>(nchunks, NR);
228 auto num_per_chunk_threaded = sanisizer::create<std::vector<std::vector<std::vector<IndexIn_> > > >(nthreads - 1);
229 int num_used;
230
231 if (mat.sparse()) {
232 num_used = tatami::parallelize([&](const int t, const IndexIn_ start, const IndexIn_ length) -> void {
233 auto ext = tatami::consecutive_extractor<true>(mat, false, start, length, [&]{
234 tatami::Options opt;
235 opt.sparse_ordered_index = false;
236 return opt;
237 }());
240
241 std::optional<std::vector<std::vector<Category> > > max_tmp;
242 auto& cur_max_per_chunk = convert_by_column_get_store_per_chunk(t, max_per_chunk, max_tmp, nchunks, NR);
243 std::optional<std::vector<std::vector<IndexIn_> > > num_tmp;
244 auto& cur_num_per_chunk = convert_by_column_get_store_per_chunk(t, num_per_chunk, num_tmp, nchunks, NR);
245
246 for (IndexIn_ c = start, end = start + length; c < end; ++c) {
247 const auto range = ext->fetch(c, dbuffer.data(), ibuffer.data());
248 const auto chunk = c / chunk_size;
249 auto& max_vec = cur_max_per_chunk[chunk];
250 auto& num_vec = cur_num_per_chunk[chunk];
251
252 for (IndexIn_ i = 0; i < range.number; ++i) {
253 if (range.value[i]) {
254 const auto cat = categorize(range.value[i]);
255 const auto r = range.index[i];
256 max_vec[r] = std::max(max_vec[r], cat);
257 ++num_vec[r];
258 }
259 }
260 }
261
262 convert_by_column_save_store_per_chunk(t, max_tmp, max_per_chunk_threaded);
263 convert_by_column_save_store_per_chunk(t, num_tmp, num_per_chunk_threaded);
264 }, NC, nthreads);
265
266 } else {
267 num_used = tatami::parallelize([&](const int t, const IndexIn_ start, const IndexIn_ length) -> void {
268 auto ext = tatami::consecutive_extractor<false>(mat, false, start, length);
270
271 std::optional<std::vector<std::vector<Category> > > max_tmp;
272 auto& cur_max_per_chunk = convert_by_column_get_store_per_chunk(t, max_per_chunk, max_tmp, nchunks, NR);
273 std::optional<std::vector<std::vector<IndexIn_> > > num_tmp;
274 auto& cur_num_per_chunk = convert_by_column_get_store_per_chunk(t, num_per_chunk, num_tmp, nchunks, NR);
275
276 for (IndexIn_ c = start, end = start + length; c < end; ++c) {
277 const auto ptr = ext->fetch(c, dbuffer.data());
278 const auto chunk = c / chunk_size;
279 auto& max_vec = cur_max_per_chunk[chunk];
280 auto& num_vec = cur_num_per_chunk[chunk];
281
282 for (IndexIn_ r = 0; r < NR; ++r) {
283 if (ptr[r]) {
284 auto cat = categorize(ptr[r]);
285 max_vec[r] = std::max(max_vec[r], cat);
286 ++num_vec[r];
287 }
288 }
289 }
290
291 convert_by_column_save_store_per_chunk(t, max_tmp, max_per_chunk_threaded);
292 convert_by_column_save_store_per_chunk(t, num_tmp, num_per_chunk_threaded);
293 }, NC, nthreads);
294 }
295
296 for (int t = 1; t < num_used; ++t) {
297 const auto& cur_max_per_chunk = max_per_chunk_threaded[t - 1];
298 const auto& cur_num_per_chunk = num_per_chunk_threaded[t - 1];
299 for (I<decltype(nchunks)> chunk = 0; chunk < nchunks; ++chunk) {
300 for (IndexIn_ r = 0; r < NR; ++r) {
301 max_per_chunk[chunk][r] = std::max(max_per_chunk[chunk][r], cur_max_per_chunk[chunk][r]);
302 num_per_chunk[chunk][r] += cur_num_per_chunk[chunk][r];
303 }
304 }
305 }
306
307 allocate_rows(
308 max_per_chunk,
309 num_per_chunk,
310 identities8,
311 identities16,
312 identities32,
313 store8,
314 store16,
315 store32,
316 assigned_category,
317 assigned_position
318 );
319 }
320
321 // Second pass to actually fill the vectors.
322 {
323 tatami::parallelize([&](const int, const IndexIn_ start, const IndexIn_ length) -> void {
325 for (I<decltype(nchunks)> chunk = 0; chunk < nchunks; ++chunk) {
326 tatami::resize_container_to_Index_size(output_positions[chunk], length);
327 for (IndexIn_ r = 0; r < length; ++r) {
328 output_positions[chunk][r] = get_sparse_ptr(store8, store16, store32, assigned_category, assigned_position, chunk, r + start);
329 }
330 }
331
333
334 if (mat.sparse()) {
336 auto ext = tatami::consecutive_extractor<true>(mat, false, static_cast<IndexIn_>(0), NC, start, length);
337
338 for (IndexIn_ c = 0; c < NC; ++c) {
339 const auto range = ext->fetch(c, dbuffer.data(), ibuffer.data());
340 const auto chunk = c / chunk_size;
341 const IndexIn_ col = c % chunk_size;
342 auto& outpos = output_positions[chunk];
343
344 for (IndexIn_ i = 0; i < range.number; ++i) {
345 if (range.value[i]) {
346 const auto r = range.index[i];
347 fill_sparse_value(store8, store16, store32, assigned_category[chunk][r], chunk, col, range.value[i], outpos[r - start]++);
348 }
349 }
350 }
351
352 } else {
353 auto ext = tatami::consecutive_extractor<false>(mat, false, static_cast<IndexIn_>(0), NC, start, length);
354
355 for (IndexIn_ c = 0; c < NC; ++c) {
356 const auto ptr = ext->fetch(c, dbuffer.data());
357 const auto chunk = c / chunk_size;
358 const IndexIn_ col = c % chunk_size;
359 auto& outpos = output_positions[chunk];
360
361 for (IndexIn_ r = 0; r < NR; ++r) {
362 if (ptr[r]) {
363 fill_sparse_value(store8, store16, store32, assigned_category[chunk][r], chunk, col, ptr[r], outpos[r - start]++);
364 }
365 }
366 }
367 }
368
369 }, NR, nthreads);
370 }
371
372 return consolidate_matrices<ValueOut_, IndexOut_>(
373 identities8,
374 identities16,
375 identities32,
376 std::move(store8),
377 std::move(store16),
378 std::move(store32),
379 NR,
380 chunk_size,
381 leftovers
382 );
383}
396 std::size_t chunk_size = sanisizer::cap<std::size_t>(65536);
397
402 int num_threads = 1;
403};
404
434template<typename ValueOut_ = double, typename IndexOut_ = int, typename ColumnIndex_ = std::uint16_t, typename ValueIn_, typename IndexIn_>
435std::shared_ptr<tatami::Matrix<ValueOut_, IndexOut_> > convert_to_layered_sparse(const tatami::Matrix<ValueIn_, IndexIn_>& mat, const ConvertToLayeredSparseOptions& options) {
436 const IndexIn_ chunk_size = check_chunk_size<IndexIn_, ColumnIndex_>(options.chunk_size);
437 if (mat.prefer_rows()) {
438 return convert_by_row<ColumnIndex_, ValueOut_, IndexOut_>(mat, chunk_size, options.num_threads);
439 } else {
440 return convert_by_column<ColumnIndex_, ValueOut_, IndexOut_>(mat, chunk_size, options.num_threads);
441 }
442}
443
447// Provided for back-compatibility.
448template<typename ValueOut_ = double, typename IndexOut_ = int, typename ColumnIndex_ = std::uint16_t, typename ValueIn_, typename IndexIn_>
449std::shared_ptr<tatami::Matrix<ValueOut_, IndexOut_> > convert_to_layered_sparse(const tatami::Matrix<ValueIn_, IndexIn_>& mat, IndexIn_ chunk_size = 65536, int num_threads = 1) {
450 return convert_to_layered_sparse(mat, [&]{
451 ConvertToLayeredSparseOptions opt;
452 opt.chunk_size = chunk_size;
453 opt.num_threads = num_threads;
454 return opt;
455 }());
456}
457
458template<typename ValueOut_ = double, typename IndexOut_ = int, typename ColumnIndex_ = std::uint16_t, typename ValueIn_, typename IndexIn_>
459std::shared_ptr<tatami::Matrix<ValueOut_, IndexOut_> > convert_to_layered_sparse(const tatami::Matrix<ValueIn_, IndexIn_>* mat, IndexIn_ chunk_size = 65536, int num_threads = 1) {
461}
466}
467
468#endif
virtual Index_ ncol() const=0
virtual Index_ nrow() const=0
virtual bool prefer_rows() const=0
virtual std::unique_ptr< MyopicSparseExtractor< Value_, Index_ > > sparse(bool row, const Options &opt) const=0
Create layered sparse matrices for tatami.
Definition convert_to_layered_sparse.hpp:20
std::shared_ptr< tatami::Matrix< ValueOut_, IndexOut_ > > convert_to_layered_sparse(const tatami::Matrix< ValueIn_, IndexIn_ > &mat, const ConvertToLayeredSparseOptions &options)
Definition convert_to_layered_sparse.hpp:435
void resize_container_to_Index_size(Container_ &container, const Index_ x, Args_ &&... args)
int parallelize(Function_ fun, const Index_ tasks, const int workers)
Container_ create_container_of_Index_size(const Index_ x, Args_ &&... args)
auto consecutive_extractor(const Matrix< Value_, Index_ > &matrix, const bool row, const Index_ iter_start, const Index_ iter_length, Args_ &&... args)
bool sparse_ordered_index
Options for convert_to_layered_sparse().
Definition convert_to_layered_sparse.hpp:391
int num_threads
Definition convert_to_layered_sparse.hpp:402
std::size_t chunk_size
Definition convert_to_layered_sparse.hpp:396