tatami_layered
Create layered sparse matrices for tatami
Loading...
Searching...
No Matches
convert_to_layered_sparse.hpp
Go to the documentation of this file.
1#ifndef TATAMI_CONVERT_TO_LAYERED_SPARSE_HPP
2#define TATAMI_CONVERT_TO_LAYERED_SPARSE_HPP
3
4#include <cstdint>
5#include <vector>
6#include <memory>
7#include <limits>
8#include <algorithm>
9
10#include "tatami/tatami.hpp"
11#include "sanisizer/sanisizer.hpp"
12
13#include "utils.hpp"
14
20namespace tatami_layered {
21
25template<typename ColIndex_, typename ValueOut_ = double, typename IndexOut_ = int, typename ValueIn_, typename IndexIn_>
26std::shared_ptr<tatami::Matrix<ValueOut_, IndexOut_> > convert_by_row(const tatami::Matrix<ValueIn_, IndexIn_>& mat, const IndexIn_ chunk_size, const int nthreads) {
27 const auto NR = mat.nrow(), NC = mat.ncol();
28 const IndexIn_ leftovers = NC % chunk_size;
29 const IndexIn_ nchunks = sanisizer::max(1, NC / chunk_size + (leftovers != 0));
30
34
38
41
42 // First pass to define the allocations.
43 {
45 for (auto& x : max_per_chunk) {
47 }
48
50 for (auto& x : num_per_chunk) {
52 }
53
54 if (mat.sparse()) {
55 tatami::parallelize([&](const int, const IndexIn_ start, const IndexIn_ length) -> void {
56 auto ext = tatami::consecutive_extractor<true>(mat, true, start, length, [&]{
58 opt.sparse_ordered_index = false;
59 return opt;
60 }());
63
64 for (IndexIn_ r = start, end = start + length; r < end; ++r) {
65 const auto range = ext->fetch(r, dbuffer.data(), ibuffer.data());
66 for (IndexIn_ i = 0; i < range.number; ++i) {
67 if (range.value[i]) {
68 const auto chunk = range.index[i] / chunk_size;
69 const auto cat = categorize(range.value[i]);
70 max_per_chunk[chunk][r] = std::max(max_per_chunk[chunk][r], cat);
71 ++num_per_chunk[chunk][r];
72 }
73 }
74 }
75 }, NR, nthreads);
76
77 } else {
78 tatami::parallelize([&](const int, const IndexIn_ start, const IndexIn_ length) -> void {
79 auto ext = tatami::consecutive_extractor<false>(mat, true, start, length);
81
82 for (IndexIn_ r = start, end = start + length; r < end; ++r) {
83 auto ptr = ext->fetch(r, dbuffer.data());
84 for (IndexIn_ c = 0; c < NC; ++c) {
85 if (ptr[c]) {
86 const auto chunk = c / chunk_size;
87 const auto cat = categorize(ptr[c]);
88 max_per_chunk[chunk][r] = std::max(max_per_chunk[chunk][r], cat);
89 ++num_per_chunk[chunk][r];
90 }
91 }
92 }
93 }, NR, nthreads);
94 }
95
96 allocate_rows(
97 max_per_chunk,
98 num_per_chunk,
99 identities8,
100 identities16,
101 identities32,
102 store8,
103 store16,
104 store32,
105 assigned_category,
106 assigned_position
107 );
108 }
109
110 // Second pass to actually fill the vectors.
111 {
112 tatami::parallelize([&](const int, const IndexIn_ start, const IndexIn_ length) -> void {
115
116 if (mat.sparse()) {
118 auto ext = tatami::consecutive_extractor<true>(mat, true, start, length);
119
120 for (IndexIn_ r = start, end = start + length; r < end; ++r) {
121 for (I<decltype(nchunks)> chunk = 0; chunk < nchunks; ++chunk) {
122 output_positions[chunk] = get_sparse_ptr(store8, store16, store32, assigned_category, assigned_position, chunk, r);
123 }
124
125 auto range = ext->fetch(r, dbuffer.data(), ibuffer.data());
126 for (IndexIn_ i = 0; i < range.number; ++i) {
127 if (range.value[i]) {
128 const IndexIn_ chunk = range.index[i] / chunk_size;
129 const IndexIn_ col = range.index[i] % chunk_size;
130 fill_sparse_value(store8, store16, store32, assigned_category[chunk][r], chunk, col, range.value[i], output_positions[chunk]++);
131 }
132 }
133 }
134
135 } else {
136 auto ext = tatami::consecutive_extractor<false>(mat, true, start, length);
137
138 for (IndexIn_ r = start, end = start + length; r < end; ++r) {
139 for (I<decltype(nchunks)> chunk = 0; chunk < nchunks; ++chunk) {
140 output_positions[chunk] = get_sparse_ptr(store8, store16, store32, assigned_category, assigned_position, chunk, r);
141 }
142
143 auto ptr = ext->fetch(r, dbuffer.data());
144 for (IndexIn_ c = 0; c < NC; ++c) {
145 if (ptr[c]) {
146 const IndexIn_ chunk = c / chunk_size;
147 const IndexIn_ col = c % chunk_size;
148 fill_sparse_value(store8, store16, store32, assigned_category[chunk][r], chunk, col, ptr[c], output_positions[chunk]++);
149 }
150 }
151 }
152 }
153
154 }, NR, nthreads);
155 }
156
157 return consolidate_matrices<ValueOut_, IndexOut_>(
158 identities8,
159 identities16,
160 identities32,
161 std::move(store8),
162 std::move(store16),
163 std::move(store32),
164 NR,
165 chunk_size,
166 leftovers
167 );
168}
169
170template<typename ColIndex_, typename ValueOut_ = double, typename IndexOut_ = int, typename ValueIn_, typename IndexIn_>
171std::shared_ptr<tatami::Matrix<ValueOut_, IndexOut_> > convert_by_column(const tatami::Matrix<ValueIn_, IndexIn_>& mat, const IndexIn_ chunk_size, const int nthreads) {
172 const auto NR = mat.nrow(), NC = mat.ncol();
173 const IndexIn_ leftovers = NC % chunk_size;
174 const IndexIn_ nchunks = sanisizer::max(1, NC / chunk_size + (leftovers != 0));
175
179
183
186
187 // First pass to define the allocations.
188 {
189 auto max_per_chunk_threaded = sanisizer::create<std::vector<std::vector<std::vector<Category> > > >(nthreads);
190 for (auto& max_per_chunk : max_per_chunk_threaded) {
192 for (auto& x : max_per_chunk) {
194 }
195 }
196
197 auto num_per_chunk_threaded = sanisizer::create<std::vector<std::vector<std::vector<IndexIn_> > > >(nthreads);
198 for (auto& num_per_chunk : num_per_chunk_threaded) {
200 for (auto& x : num_per_chunk) {
202 }
203 }
204
205 if (mat.sparse()) {
206 tatami::parallelize([&](const int t, const IndexIn_ start, const IndexIn_ length) -> void {
207 auto ext = tatami::consecutive_extractor<true>(mat, false, start, length, [&]{
208 tatami::Options opt;
209 opt.sparse_ordered_index = false;
210 return opt;
211 }());
214
215 auto& max_per_chunk = max_per_chunk_threaded[t];
216 auto& num_per_chunk = num_per_chunk_threaded[t];
217
218 for (IndexIn_ c = start, end = start + length; c < end; ++c) {
219 const auto range = ext->fetch(c, dbuffer.data(), ibuffer.data());
220 const auto chunk = c / chunk_size;
221 auto& max_vec = max_per_chunk[chunk];
222 auto& num_vec = num_per_chunk[chunk];
223
224 for (IndexIn_ i = 0; i < range.number; ++i) {
225 if (range.value[i]) {
226 const auto cat = categorize(range.value[i]);
227 const auto r = range.index[i];
228 max_vec[r] = std::max(max_vec[r], cat);
229 ++num_vec[r];
230 }
231 }
232 }
233 }, NC, nthreads);
234
235 } else {
236 tatami::parallelize([&](const int t, const IndexIn_ start, const IndexIn_ length) -> void {
237 auto ext = tatami::consecutive_extractor<false>(mat, false, start, length);
239
240 auto& max_per_chunk = max_per_chunk_threaded[t];
241 auto& num_per_chunk = num_per_chunk_threaded[t];
242
243 for (IndexIn_ c = start, end = start + length; c < end; ++c) {
244 const auto ptr = ext->fetch(c, dbuffer.data());
245 const auto chunk = c / chunk_size;
246 auto& max_vec = max_per_chunk[chunk];
247 auto& num_vec = num_per_chunk[chunk];
248
249 for (IndexIn_ r = 0; r < NR; ++r) {
250 if (ptr[r]) {
251 auto cat = categorize(ptr[r]);
252 max_vec[r] = std::max(max_vec[r], cat);
253 ++num_vec[r];
254 }
255 }
256 }
257 }, NC, nthreads);
258 }
259
262
263 for (I<decltype(nchunks)> chunk = 0; chunk < nchunks; ++chunk) {
264 // Assume we have at least one thread!
265 max_per_chunk[chunk].swap(max_per_chunk_threaded[0][chunk]);
266 num_per_chunk[chunk].swap(num_per_chunk_threaded[0][chunk]);
267
268 for (int t = 1; t < nthreads; ++t) {
269 for (IndexIn_ r = 0; r < NR; ++r) {
270 max_per_chunk[chunk][r] = std::max(max_per_chunk[chunk][r], max_per_chunk_threaded[t][chunk][r]);
271 num_per_chunk[chunk][r] += num_per_chunk_threaded[t][chunk][r];
272 }
273 }
274 }
275
276 allocate_rows(
277 max_per_chunk,
278 num_per_chunk,
279 identities8,
280 identities16,
281 identities32,
282 store8,
283 store16,
284 store32,
285 assigned_category,
286 assigned_position
287 );
288 }
289
290 // Second pass to actually fill the vectors.
291 {
292 tatami::parallelize([&](const int, const IndexIn_ start, const IndexIn_ length) -> void {
294 for (I<decltype(nchunks)> chunk = 0; chunk < nchunks; ++chunk) {
295 tatami::resize_container_to_Index_size(output_positions[chunk], length);
296 for (IndexIn_ r = 0; r < length; ++r) {
297 output_positions[chunk][r] = get_sparse_ptr(store8, store16, store32, assigned_category, assigned_position, chunk, r + start);
298 }
299 }
300
302
303 if (mat.sparse()) {
305 auto ext = tatami::consecutive_extractor<true>(mat, false, static_cast<IndexIn_>(0), NC, start, length);
306
307 for (IndexIn_ c = 0; c < NC; ++c) {
308 const auto range = ext->fetch(c, dbuffer.data(), ibuffer.data());
309 const auto chunk = c / chunk_size;
310 const IndexIn_ col = c % chunk_size;
311 auto& outpos = output_positions[chunk];
312
313 for (IndexIn_ i = 0; i < range.number; ++i) {
314 if (range.value[i]) {
315 const auto r = range.index[i];
316 fill_sparse_value(store8, store16, store32, assigned_category[chunk][r], chunk, col, range.value[i], outpos[r - start]++);
317 }
318 }
319 }
320
321 } else {
322 auto ext = tatami::consecutive_extractor<false>(mat, false, static_cast<IndexIn_>(0), NC, start, length);
323
324 for (IndexIn_ c = 0; c < NC; ++c) {
325 const auto ptr = ext->fetch(c, dbuffer.data());
326 const auto chunk = c / chunk_size;
327 const IndexIn_ col = c % chunk_size;
328 auto& outpos = output_positions[chunk];
329
330 for (IndexIn_ r = 0; r < NR; ++r) {
331 if (ptr[r]) {
332 fill_sparse_value(store8, store16, store32, assigned_category[chunk][r], chunk, col, ptr[r], outpos[r - start]++);
333 }
334 }
335 }
336 }
337
338 }, NR, nthreads);
339 }
340
341 return consolidate_matrices<ValueOut_, IndexOut_>(
342 identities8,
343 identities16,
344 identities32,
345 std::move(store8),
346 std::move(store16),
347 std::move(store32),
348 NR,
349 chunk_size,
350 leftovers
351 );
352}
365 std::size_t chunk_size = sanisizer::cap<std::size_t>(65536);
366
371 int num_threads = 1;
372};
373
403template<typename ValueOut_ = double, typename IndexOut_ = int, typename ColumnIndex_ = std::uint16_t, typename ValueIn_, typename IndexIn_>
404std::shared_ptr<tatami::Matrix<ValueOut_, IndexOut_> > convert_to_layered_sparse(const tatami::Matrix<ValueIn_, IndexIn_>& mat, const ConvertToLayeredSparseOptions& options) {
405 const IndexIn_ chunk_size = check_chunk_size<IndexIn_, ColumnIndex_>(options.chunk_size);
406 if (mat.prefer_rows()) {
407 return convert_by_row<ColumnIndex_, ValueOut_, IndexOut_>(mat, chunk_size, options.num_threads);
408 } else {
409 return convert_by_column<ColumnIndex_, ValueOut_, IndexOut_>(mat, chunk_size, options.num_threads);
410 }
411}
412
416// Provided for back-compatibility.
417template<typename ValueOut_ = double, typename IndexOut_ = int, typename ColumnIndex_ = std::uint16_t, typename ValueIn_, typename IndexIn_>
418std::shared_ptr<tatami::Matrix<ValueOut_, IndexOut_> > convert_to_layered_sparse(const tatami::Matrix<ValueIn_, IndexIn_>& mat, IndexIn_ chunk_size = 65536, int num_threads = 1) {
419 return convert_to_layered_sparse(mat, [&]{
420 ConvertToLayeredSparseOptions opt;
421 opt.chunk_size = chunk_size;
422 opt.num_threads = num_threads;
423 return opt;
424 }());
425}
426
427template<typename ValueOut_ = double, typename IndexOut_ = int, typename ColumnIndex_ = std::uint16_t, typename ValueIn_, typename IndexIn_>
428std::shared_ptr<tatami::Matrix<ValueOut_, IndexOut_> > convert_to_layered_sparse(const tatami::Matrix<ValueIn_, IndexIn_>* mat, IndexIn_ chunk_size = 65536, int num_threads = 1) {
430}
435}
436
437#endif
virtual Index_ ncol() const=0
virtual Index_ nrow() const=0
virtual bool prefer_rows() const=0
virtual std::unique_ptr< MyopicSparseExtractor< Value_, Index_ > > sparse(bool row, const Options &opt) const=0
Create layered sparse matrices for tatami.
Definition convert_to_layered_sparse.hpp:20
std::shared_ptr< tatami::Matrix< ValueOut_, IndexOut_ > > convert_to_layered_sparse(const tatami::Matrix< ValueIn_, IndexIn_ > &mat, const ConvertToLayeredSparseOptions &options)
Definition convert_to_layered_sparse.hpp:404
void parallelize(Function_ fun, const Index_ tasks, const int threads)
void resize_container_to_Index_size(Container_ &container, const Index_ x, Args_ &&... args)
Container_ create_container_of_Index_size(const Index_ x, Args_ &&... args)
auto consecutive_extractor(const Matrix< Value_, Index_ > &matrix, const bool row, const Index_ iter_start, const Index_ iter_length, Args_ &&... args)
bool sparse_ordered_index
Options for convert_to_layered_sparse().
Definition convert_to_layered_sparse.hpp:360
int num_threads
Definition convert_to_layered_sparse.hpp:371
std::size_t chunk_size
Definition convert_to_layered_sparse.hpp:365