tatami
C++ API for different matrix representations
Loading...
Searching...
No Matches
convert_to_dense.hpp
Go to the documentation of this file.
1#ifndef TATAMI_CONVERT_TO_DENSE_H
2#define TATAMI_CONVERT_TO_DENSE_H
3
4#include "./DenseMatrix.hpp"
5#include "./transpose.hpp"
6
9#include "../utils/copy.hpp"
10
11#include <memory>
12#include <vector>
13#include <cstddef>
14
21namespace tatami {
22
32
44template <typename StoredValue_, typename InputValue_, typename InputIndex_>
45void convert_to_dense(const Matrix<InputValue_, InputIndex_>& matrix, bool row_major, StoredValue_* store, const ConvertToDenseOptions& options) {
46 InputIndex_ NR = matrix.nrow();
47 InputIndex_ NC = matrix.ncol();
48 bool pref_rows = matrix.prefer_rows();
49 std::size_t primary = (pref_rows ? NR : NC);
50 std::size_t secondary = (pref_rows ? NC : NR);
51
52 if (row_major == pref_rows) {
53 constexpr bool same_type = std::is_same<InputValue_, StoredValue_>::value;
54 parallelize([&](int, std::size_t start, std::size_t length) -> void {
55 std::vector<InputValue_> temp(same_type ? 0 : secondary);
56 auto wrk = consecutive_extractor<false, InputValue_, InputIndex_>(matrix, pref_rows, start, length);
57
58 for (decltype(length) x = 0; x < length; ++x) {
59 auto store_copy = store + static_cast<std::size_t>(start + x) * secondary; // cast to avoid overflow.
60 if constexpr(same_type) {
61 auto ptr = wrk->fetch(store_copy);
62 copy_n(ptr, secondary, store_copy);
63 } else {
64 auto ptr = wrk->fetch(temp.data());
65 std::copy_n(ptr, secondary, store_copy);
66 }
67 }
68 }, primary, options.num_threads);
69
70 } else if (matrix.is_sparse()) {
71 std::fill_n(store, primary * secondary, 0); // already cast to std::size_t to avoid overflow.
72
73 // We iterate over the input matrix's preferred dimension but split
74 // into threads along the non-preferred dimension. This aims to
75 // reduce false sharing across threads during writes, as locations
76 // for simultaneous writes in the transposed matrix will be
77 // separated by around 'secondary * length' elements.
78 parallelize([&](int, std::size_t start, std::size_t length) -> void {
79 auto wrk = consecutive_extractor<true, InputValue_, InputIndex_>(matrix, pref_rows, 0, primary, start, length);
80 std::vector<InputValue_> vtemp(length);
81 std::vector<InputIndex_> itemp(length);
82
83 // Note that we don't use the blocked transposition strategy
84 // from the dense case, because the overhead of looping is
85 // worse than the cache misses for sparse data.
86 for (decltype(primary) x = 0; x < primary; ++x) {
87 auto range = wrk->fetch(vtemp.data(), itemp.data());
88 for (InputIndex_ i = 0; i < range.number; ++i) {
89 store[static_cast<std::size_t>(range.index[i]) * primary + x] = range.value[i]; // cast to std::size_t to avoid overflow.
90 }
91 }
92 }, secondary, options.num_threads);
93
94 } else {
95 // Same logic as described for the sparse case; we iterate along the
96 // preferred dimension but split into threads along the non-preferred
97 // dimension to reduce false sharing.
98 parallelize([&](int, std::size_t start, std::size_t length) -> void {
99 auto wrk = consecutive_extractor<false, InputValue_, InputIndex_>(matrix, pref_rows, 0, primary, start, length);
100
101 // Performing a blocked transposition to be more
102 // cache-friendly. This involves collecting several
103 // consecutive primary dimension elements so that we can
104 // transpose by blocks along the secondary dimension.
105 constexpr std::size_t block_size = 16;
106 std::size_t alloc = std::min(primary, block_size);
107 std::vector<InputValue_> bigbuffer(length * alloc); // already size_ts, to avoid overflow.
108 std::vector<const InputValue_*> ptrs(alloc);
109 std::vector<InputValue_*> buf_ptrs(alloc);
110 for (decltype(alloc) i = 0; i < alloc; ++i) {
111 buf_ptrs[i] = bigbuffer.data() + i * length; // already all size_t's, to avoid overflow.
112 }
113
114 std::size_t prim_i = 0;
115 while (prim_i < primary) {
116 std::size_t prim_to_process = std::min(static_cast<std::size_t>(primary - prim_i), block_size);
117 for (decltype(prim_to_process) c = 0; c < prim_to_process; ++c) {
118 ptrs[c] = wrk->fetch(buf_ptrs[c]);
119 }
120
121 std::size_t sec_i = 0;
122 while (sec_i < length) {
123 std::size_t sec_end = sec_i + std::min(static_cast<std::size_t>(length - sec_i), block_size);
124 for (decltype(prim_to_process) c = 0; c < prim_to_process; ++c) {
125 auto input = ptrs[c];
126 std::size_t offset = start * primary + (c + prim_i); // already all std::size_t's, to avoid overflow.
127 for (std::size_t r = sec_i; r < sec_end; ++r) {
128 store[r * primary + offset] = input[r]; // again, these are all size_t's already.
129 }
130 }
131
132 sec_i = sec_end;
133 }
134 prim_i += prim_to_process;
135 }
136 }, secondary, options.num_threads);
137 }
138
139 return;
140}
141
156template <
157 typename Value_,
158 typename Index_,
159 typename StoredValue_ = Value_,
160 typename InputValue_,
161 typename InputIndex_
162>
163inline std::shared_ptr<Matrix<Value_, Index_> > convert_to_dense(const Matrix<InputValue_, InputIndex_>& matrix, bool row_major, const ConvertToDenseOptions& options) {
164 auto NR = matrix.nrow();
165 auto NC = matrix.ncol();
166 std::vector<StoredValue_> buffer(static_cast<std::size_t>(NR) * static_cast<std::size_t>(NC)); // cast to size_t to avoid overflow.
167 convert_to_dense(matrix, row_major, buffer.data(), options);
168 return std::shared_ptr<Matrix<Value_, Index_> >(new DenseMatrix<Value_, Index_, decltype(buffer)>(NR, NC, std::move(buffer), row_major));
169}
170
174// Backwards compatbility.
175template <typename StoredValue_, typename InputValue_, typename InputIndex_>
176void convert_to_dense(const Matrix<InputValue_, InputIndex_>* matrix, bool row_major, StoredValue_* store, int threads = 1) {
178 *matrix,
179 row_major,
180 store,
181 [&]{
182 ConvertToDenseOptions options;
183 options.num_threads = threads;
184 return options;
185 }()
186 );
187}
188
189template <typename Value_ = double, typename Index_ = int, typename StoredValue_ = Value_, typename InputValue_, typename InputIndex_>
190inline std::shared_ptr<Matrix<Value_, Index_> > convert_to_dense(const Matrix<InputValue_, InputIndex_>* matrix, bool row_major, int threads = 1) {
191 ConvertToDenseOptions options;
192 options.num_threads = threads;
194 *matrix,
195 row_major,
196 [&]{
197 ConvertToDenseOptions options;
198 options.num_threads = threads;
199 return options;
200 }()
201 );
202}
203
204template<bool row_, typename StoredValue_, typename InputValue_, typename InputIndex_>
205void convert_to_dense(const Matrix<InputValue_, InputIndex_>* matrix, StoredValue_* store, int threads = 1) {
206 convert_to_dense(matrix, row_, store, threads);
207}
208
209template<bool row_, typename Value_, typename Index_, typename StoredValue_ = Value_, typename InputValue_, typename InputIndex_>
210inline std::shared_ptr<Matrix<Value_, Index_> > convert_to_dense(const Matrix<InputValue_, InputIndex_>* matrix, int threads = 1) {
211 return convert_to_dense<Value_, Index_, StoredValue_>(matrix, row_, threads);
212}
217}
218
219#endif
Dense matrix representation.
Dense matrix representation.
Definition DenseMatrix.hpp:182
Virtual class for a matrix.
Definition Matrix.hpp:59
virtual Index_ ncol() const =0
virtual Index_ nrow() const =0
virtual bool prefer_rows() const =0
virtual bool is_sparse() const =0
Templated construction of a new consecutive extractor.
Copy data from one buffer to another.
Flexible representations for matrix data.
Definition Extractor.hpp:15
void parallelize(Function_ fun, Index_ tasks, int threads)
Definition parallelize.hpp:42
Value_ * copy_n(const Value_ *input, Size_ n, Value_ *output)
Definition copy.hpp:25
void convert_to_dense(const Matrix< InputValue_, InputIndex_ > &matrix, bool row_major, StoredValue_ *store, const ConvertToDenseOptions &options)
Definition convert_to_dense.hpp:45
auto consecutive_extractor(const Matrix< Value_, Index_ > &matrix, bool row, Index_ iter_start, Index_ iter_length, Args_ &&... args)
Definition consecutive_extractor.hpp:35
Parallelized iteration over a tatami::Matrix.
Options for convert_to_dense().
Definition convert_to_dense.hpp:26
int num_threads
Definition convert_to_dense.hpp:30
Transpose a dense array.