tatami
C++ API for different matrix representations
Loading...
Searching...
No Matches
convert_to_dense.hpp
Go to the documentation of this file.
1#ifndef TATAMI_CONVERT_TO_DENSE_H
2#define TATAMI_CONVERT_TO_DENSE_H
3
4#include "./DenseMatrix.hpp"
5
8#include "../utils/copy.hpp"
10
11#include <memory>
12#include <vector>
13#include <cstddef>
14
15#include "sanisizer/sanisizer.hpp"
16
23namespace tatami {
24
34
46template <typename StoredValue_, typename InputValue_, typename InputIndex_>
47void convert_to_dense(const Matrix<InputValue_, InputIndex_>& matrix, const bool row_major, StoredValue_* const store, const ConvertToDenseOptions& options) {
48 const InputIndex_ NR = matrix.nrow();
49 const InputIndex_ NC = matrix.ncol();
50 const bool pref_rows = matrix.prefer_rows();
51 const auto primary = (pref_rows ? NR : NC);
52 const auto secondary = (pref_rows ? NC : NR);
53
54 // We assume that 'store' was allocated correctly, in which case the product of 'primary' and 'secondary' is known to fit inside a std::size_t.
55 // This saves us from various checks when computing related products (see all the product_unsafe() calls).
56
57 if (row_major == pref_rows) {
58 constexpr bool same_type = std::is_same<InputValue_, StoredValue_>::value;
60
61 parallelize([&](const int, const InputIndex_ start, const InputIndex_ length) -> void {
62 auto wrk = consecutive_extractor<false, InputValue_, InputIndex_>(matrix, pref_rows, start, length);
63 auto temp = [&]{
64 if constexpr(same_type) {
65 return false;
66 } else {
67 return std::vector<InputValue_>(secondary);
68 }
69 }();
70
71 for (InputIndex_ x = 0; x < length; ++x) {
72 const auto store_copy = store + sanisizer::product_unsafe<std::size_t>(secondary, start + x);
73 if constexpr(same_type) {
74 auto ptr = wrk->fetch(store_copy);
75 copy_n(ptr, secondary, store_copy);
76 } else {
77 auto ptr = wrk->fetch(temp.data());
78 std::copy_n(ptr, secondary, store_copy);
79 }
80 }
81 }, primary, options.num_threads);
82
83 } else if (matrix.is_sparse()) {
84 std::fill_n(store, sanisizer::product_unsafe<std::size_t>(primary, secondary), 0);
85
86 // We iterate over the input matrix's preferred dimension but split
87 // into threads along the non-preferred dimension. This aims to
88 // reduce false sharing across threads during writes, as locations
89 // for simultaneous writes in the transposed matrix will be
90 // separated by around 'secondary * length' elements.
91 parallelize([&](const int, const InputIndex_ start, const InputIndex_ length) -> void {
92 auto wrk = consecutive_extractor<true, InputValue_, InputIndex_>(matrix, pref_rows, 0, primary, start, length);
95
96 // Note that we don't use the blocked transposition strategy
97 // from the dense case, because the overhead of looping is
98 // worse than the cache misses for sparse data.
99 for (InputIndex_ x = 0; x < primary; ++x) {
100 const auto range = wrk->fetch(vtemp.data(), itemp.data());
101 for (InputIndex_ i = 0; i < range.number; ++i) {
102 store[sanisizer::nd_offset<std::size_t>(x, primary, range.index[i])] = range.value[i];
103 }
104 }
105 }, secondary, options.num_threads);
106
107 } else {
108 // Same logic as described for the sparse case; we iterate along the
109 // preferred dimension but split into threads along the non-preferred
110 // dimension to reduce false sharing.
111 parallelize([&](const int, const InputIndex_ start, const InputIndex_ length) -> void {
112 auto wrk = consecutive_extractor<false, InputValue_, InputIndex_>(matrix, pref_rows, 0, primary, start, length);
113
114 // Performing a blocked transposition to be more
115 // cache-friendly. This involves collecting several
116 // consecutive primary dimension elements so that we can
117 // transpose by blocks along the secondary dimension.
118 constexpr InputIndex_ block_size = 16;
119 const InputIndex_ alloc = std::min(primary, block_size);
120 std::vector<InputValue_> bigbuffer(sanisizer::product_unsafe<typename std::vector<InputValue_>::size_type>(length, alloc));
121 std::vector<const InputValue_*> ptrs(alloc); // no need for protection here, we know that alloc <= 16.
122 std::vector<InputValue_*> buf_ptrs(alloc);
123 for (InputIndex_ i = 0; i < alloc; ++i) {
124 buf_ptrs[i] = bigbuffer.data() + sanisizer::product_unsafe<std::size_t>(length, i);
125 }
126
127 InputIndex_ prim_i = 0;
128 while (prim_i < primary) {
129 const InputIndex_ prim_to_process = std::min(static_cast<InputIndex_>(primary - prim_i), block_size);
130 for (InputIndex_ c = 0; c < prim_to_process; ++c) {
131 ptrs[c] = wrk->fetch(buf_ptrs[c]);
132 }
133
134 InputIndex_ sec_i = 0;
135 while (sec_i < length) {
136 const InputIndex_ sec_end = sec_i + std::min(static_cast<InputIndex_>(length - sec_i), block_size);
137 for (InputIndex_ c = 0; c < prim_to_process; ++c) {
138 const auto input = ptrs[c];
139 for (InputIndex_ r = sec_i; r < sec_end; ++r) {
140 store[sanisizer::nd_offset<std::size_t>(c + prim_i, primary, r + start)] = input[r];
141 }
142 }
143
144 sec_i = sec_end;
145 }
146 prim_i += prim_to_process;
147 }
148 }, secondary, options.num_threads);
149 }
150
151 return;
152}
153
168template <
169 typename Value_,
170 typename Index_,
171 typename StoredValue_ = Value_,
172 typename InputValue_,
173 typename InputIndex_
174>
175std::shared_ptr<Matrix<Value_, Index_> > convert_to_dense(const Matrix<InputValue_, InputIndex_>& matrix, const bool row_major, const ConvertToDenseOptions& options) {
176 const auto NR = matrix.nrow();
177 const auto NC = matrix.ncol();
178 const auto buffer_size = sanisizer::product<typename std::vector<StoredValue_>::size_type>(attest_for_Index(NR), attest_for_Index(NC));
179 std::vector<StoredValue_> buffer(buffer_size);
180 convert_to_dense(matrix, row_major, buffer.data(), options);
181
182 return std::shared_ptr<Matrix<Value_, Index_> >(
183 new DenseMatrix<Value_, Index_, I<decltype(buffer)> >(
184 sanisizer::cast<Index_>(attest_for_Index(NR)),
185 sanisizer::cast<Index_>(attest_for_Index(NC)),
186 std::move(buffer),
187 row_major
188 )
189 );
190}
191
195// Backwards compatbility.
196template <typename StoredValue_, typename InputValue_, typename InputIndex_>
197void convert_to_dense(const Matrix<InputValue_, InputIndex_>* matrix, bool row_major, StoredValue_* store, int threads = 1) {
199 *matrix,
200 row_major,
201 store,
202 [&]{
203 ConvertToDenseOptions options;
204 options.num_threads = threads;
205 return options;
206 }()
207 );
208}
209
210template <typename Value_ = double, typename Index_ = int, typename StoredValue_ = Value_, typename InputValue_, typename InputIndex_>
211inline std::shared_ptr<Matrix<Value_, Index_> > convert_to_dense(const Matrix<InputValue_, InputIndex_>* matrix, bool row_major, int threads = 1) {
212 ConvertToDenseOptions options;
213 options.num_threads = threads;
215 *matrix,
216 row_major,
217 [&]{
218 ConvertToDenseOptions options;
219 options.num_threads = threads;
220 return options;
221 }()
222 );
223}
224
225template<bool row_, typename StoredValue_, typename InputValue_, typename InputIndex_>
226void convert_to_dense(const Matrix<InputValue_, InputIndex_>* matrix, StoredValue_* store, int threads = 1) {
227 convert_to_dense(matrix, row_, store, threads);
228}
229
230template<bool row_, typename Value_, typename Index_, typename StoredValue_ = Value_, typename InputValue_, typename InputIndex_>
231inline std::shared_ptr<Matrix<Value_, Index_> > convert_to_dense(const Matrix<InputValue_, InputIndex_>* matrix, int threads = 1) {
232 return convert_to_dense<Value_, Index_, StoredValue_>(matrix, row_, threads);
233}
238}
239
240#endif
Dense matrix representation.
Convert index type to container size.
Dense matrix representation.
Definition DenseMatrix.hpp:172
Virtual class for a matrix.
Definition Matrix.hpp:59
virtual Index_ ncol() const =0
virtual Index_ nrow() const =0
virtual bool prefer_rows() const =0
virtual bool is_sparse() const =0
Templated construction of a new consecutive extractor.
Copy data from one buffer to another.
Flexible representations for matrix data.
Definition Extractor.hpp:15
void parallelize(Function_ fun, const Index_ tasks, const int threads)
Definition parallelize.hpp:42
Index_ can_cast_Index_to_container_size(const Index_ x)
Definition Index_to_container.hpp:49
void convert_to_dense(const Matrix< InputValue_, InputIndex_ > &matrix, const bool row_major, StoredValue_ *const store, const ConvertToDenseOptions &options)
Definition convert_to_dense.hpp:47
Value_ * copy_n(const Value_ *const input, const Size_ n, Value_ *const output)
Definition copy.hpp:37
Container_ create_container_of_Index_size(const Index_ x, Args_ &&... args)
Definition Index_to_container.hpp:82
auto consecutive_extractor(const Matrix< Value_, Index_ > &matrix, const bool row, const Index_ iter_start, const Index_ iter_length, Args_ &&... args)
Definition consecutive_extractor.hpp:35
Parallelized iteration over a tatami::Matrix.
Options for convert_to_dense().
Definition convert_to_dense.hpp:28
int num_threads
Definition convert_to_dense.hpp:32