tatami
C++ API for different matrix representations
Loading...
Searching...
No Matches
convert_to_dense.hpp
Go to the documentation of this file.
1#ifndef TATAMI_CONVERT_TO_DENSE_H
2#define TATAMI_CONVERT_TO_DENSE_H
3
4#include "./DenseMatrix.hpp"
5
8#include "../utils/copy.hpp"
9#include "../utils/Index_to_container.hpp"
10
11#include <memory>
12#include <vector>
13#include <cstddef>
14
15#include "sanisizer/sanisizer.hpp"
16
23namespace tatami {
24
34
46template <typename StoredValue_, typename InputValue_, typename InputIndex_>
47void convert_to_dense(const Matrix<InputValue_, InputIndex_>& matrix, bool row_major, StoredValue_* store, const ConvertToDenseOptions& options) {
48 InputIndex_ NR = matrix.nrow();
49 InputIndex_ NC = matrix.ncol();
50 bool pref_rows = matrix.prefer_rows();
51 auto primary = (pref_rows ? NR : NC);
52 auto secondary = (pref_rows ? NC : NR);
53
54 // We assume that 'store' was allocated correctly, in which case the product of 'primary' and 'secondary' is known to fit inside a std::size_t.
55 // This saves us from various checks when computing related products (see all the product_unsafe() calls).
56
57 if (row_major == pref_rows) {
58 constexpr bool same_type = std::is_same<InputValue_, StoredValue_>::value;
59 parallelize([&](int, InputIndex_ start, InputIndex_ length) -> void {
60 auto wrk = consecutive_extractor<false, InputValue_, InputIndex_>(matrix, pref_rows, start, length);
61 auto temp = [&]{
62 if constexpr(same_type) {
63 return false;
64 } else {
66 }
67 }();
68
69 for (decltype(length) x = 0; x < length; ++x) {
70 auto store_copy = store + sanisizer::product_unsafe<std::size_t>(secondary, start + x);
71 if constexpr(same_type) {
72 auto ptr = wrk->fetch(store_copy);
73 copy_n(ptr, secondary, store_copy);
74 } else {
75 auto ptr = wrk->fetch(temp.data());
76 std::copy_n(ptr, secondary, store_copy);
77 }
78 }
79 }, primary, options.num_threads);
80
81 } else if (matrix.is_sparse()) {
82 std::fill_n(store, sanisizer::product_unsafe<std::size_t>(primary, secondary), 0);
83
84 // We iterate over the input matrix's preferred dimension but split
85 // into threads along the non-preferred dimension. This aims to
86 // reduce false sharing across threads during writes, as locations
87 // for simultaneous writes in the transposed matrix will be
88 // separated by around 'secondary * length' elements.
89 parallelize([&](int, InputIndex_ start, InputIndex_ length) -> void {
90 auto wrk = consecutive_extractor<true, InputValue_, InputIndex_>(matrix, pref_rows, 0, primary, start, length);
93
94 // Note that we don't use the blocked transposition strategy
95 // from the dense case, because the overhead of looping is
96 // worse than the cache misses for sparse data.
97 for (decltype(primary) x = 0; x < primary; ++x) {
98 auto range = wrk->fetch(vtemp.data(), itemp.data());
99 for (InputIndex_ i = 0; i < range.number; ++i) {
100 store[sanisizer::nd_offset<std::size_t>(x, primary, range.index[i])] = range.value[i];
101 }
102 }
103 }, secondary, options.num_threads);
104
105 } else {
106 // Same logic as described for the sparse case; we iterate along the
107 // preferred dimension but split into threads along the non-preferred
108 // dimension to reduce false sharing.
109 parallelize([&](int, InputIndex_ start, InputIndex_ length) -> void {
110 auto wrk = consecutive_extractor<false, InputValue_, InputIndex_>(matrix, pref_rows, 0, primary, start, length);
111
112 // Performing a blocked transposition to be more
113 // cache-friendly. This involves collecting several
114 // consecutive primary dimension elements so that we can
115 // transpose by blocks along the secondary dimension.
116 constexpr InputIndex_ block_size = 16;
117 InputIndex_ alloc = std::min(primary, block_size);
118 std::vector<InputValue_> bigbuffer(sanisizer::product_unsafe<typename std::vector<InputValue_>::size_type>(length, alloc));
119 std::vector<const InputValue_*> ptrs(alloc); // no need for protection here, we know that alloc <= 16.
120 std::vector<InputValue_*> buf_ptrs(alloc);
121 for (decltype(alloc) i = 0; i < alloc; ++i) {
122 buf_ptrs[i] = bigbuffer.data() + sanisizer::product_unsafe<std::size_t>(length, i);
123 }
124
125 InputIndex_ prim_i = 0;
126 while (prim_i < primary) {
127 InputIndex_ prim_to_process = std::min(static_cast<InputIndex_>(primary - prim_i), block_size);
128 for (decltype(prim_to_process) c = 0; c < prim_to_process; ++c) {
129 ptrs[c] = wrk->fetch(buf_ptrs[c]);
130 }
131
132 InputIndex_ sec_i = 0;
133 while (sec_i < length) {
134 InputIndex_ sec_end = sec_i + std::min(static_cast<InputIndex_>(length - sec_i), block_size);
135 for (decltype(prim_to_process) c = 0; c < prim_to_process; ++c) {
136 auto input = ptrs[c];
137 for (InputIndex_ r = sec_i; r < sec_end; ++r) {
138 store[sanisizer::nd_offset<std::size_t>(c + prim_i, primary, r + start)] = input[r];
139 }
140 }
141
142 sec_i = sec_end;
143 }
144 prim_i += prim_to_process;
145 }
146 }, secondary, options.num_threads);
147 }
148
149 return;
150}
151
166template <
167 typename Value_,
168 typename Index_,
169 typename StoredValue_ = Value_,
170 typename InputValue_,
171 typename InputIndex_
172>
173inline std::shared_ptr<Matrix<Value_, Index_> > convert_to_dense(const Matrix<InputValue_, InputIndex_>& matrix, bool row_major, const ConvertToDenseOptions& options) {
174 auto NR = matrix.nrow();
175 auto NC = matrix.ncol();
176 auto buffer_size = sanisizer::product<std::size_t>(NR, NC); // Make sure the product fits in a size_t for array access via a pointer, just in case size_type != size_t.
177 auto buffer = sanisizer::create<std::vector<StoredValue_> >(buffer_size);
178 convert_to_dense(matrix, row_major, buffer.data(), options);
179 return std::shared_ptr<Matrix<Value_, Index_> >(new DenseMatrix<Value_, Index_, decltype(buffer)>(NR, NC, std::move(buffer), row_major));
180}
181
185// Backwards compatbility.
186template <typename StoredValue_, typename InputValue_, typename InputIndex_>
187void convert_to_dense(const Matrix<InputValue_, InputIndex_>* matrix, bool row_major, StoredValue_* store, int threads = 1) {
189 *matrix,
190 row_major,
191 store,
192 [&]{
193 ConvertToDenseOptions options;
194 options.num_threads = threads;
195 return options;
196 }()
197 );
198}
199
200template <typename Value_ = double, typename Index_ = int, typename StoredValue_ = Value_, typename InputValue_, typename InputIndex_>
201inline std::shared_ptr<Matrix<Value_, Index_> > convert_to_dense(const Matrix<InputValue_, InputIndex_>* matrix, bool row_major, int threads = 1) {
202 ConvertToDenseOptions options;
203 options.num_threads = threads;
205 *matrix,
206 row_major,
207 [&]{
208 ConvertToDenseOptions options;
209 options.num_threads = threads;
210 return options;
211 }()
212 );
213}
214
215template<bool row_, typename StoredValue_, typename InputValue_, typename InputIndex_>
216void convert_to_dense(const Matrix<InputValue_, InputIndex_>* matrix, StoredValue_* store, int threads = 1) {
217 convert_to_dense(matrix, row_, store, threads);
218}
219
220template<bool row_, typename Value_, typename Index_, typename StoredValue_ = Value_, typename InputValue_, typename InputIndex_>
221inline std::shared_ptr<Matrix<Value_, Index_> > convert_to_dense(const Matrix<InputValue_, InputIndex_>* matrix, int threads = 1) {
222 return convert_to_dense<Value_, Index_, StoredValue_>(matrix, row_, threads);
223}
228}
229
230#endif
Dense matrix representation.
Dense matrix representation.
Definition DenseMatrix.hpp:169
Virtual class for a matrix.
Definition Matrix.hpp:59
virtual Index_ ncol() const =0
virtual Index_ nrow() const =0
virtual bool prefer_rows() const =0
virtual bool is_sparse() const =0
Templated construction of a new consecutive extractor.
Copy data from one buffer to another.
Flexible representations for matrix data.
Definition Extractor.hpp:15
void parallelize(Function_ fun, Index_ tasks, int threads)
Definition parallelize.hpp:42
Container_ create_container_of_Index_size(Index_ x, Args_ &&... args)
Definition Index_to_container.hpp:70
Value_ * copy_n(const Value_ *input, Size_ n, Value_ *output)
Definition copy.hpp:26
void convert_to_dense(const Matrix< InputValue_, InputIndex_ > &matrix, bool row_major, StoredValue_ *store, const ConvertToDenseOptions &options)
Definition convert_to_dense.hpp:47
auto consecutive_extractor(const Matrix< Value_, Index_ > &matrix, bool row, Index_ iter_start, Index_ iter_length, Args_ &&... args)
Definition consecutive_extractor.hpp:35
Parallelized iteration over a tatami::Matrix.
Options for convert_to_dense().
Definition convert_to_dense.hpp:28
int num_threads
Definition convert_to_dense.hpp:32