tatami
C++ API for different matrix representations
Loading...
Searching...
No Matches
convert_to_dense.hpp
Go to the documentation of this file.
1#ifndef TATAMI_CONVERT_TO_DENSE_H
2#define TATAMI_CONVERT_TO_DENSE_H
3
4#include "./DenseMatrix.hpp"
5#include "./transpose.hpp"
6
7#include "../utils/consecutive_extractor.hpp"
8#include "../utils/parallelize.hpp"
9#include "../utils/copy.hpp"
10
11#include <memory>
12#include <vector>
13#include <deque>
14
21namespace tatami {
22
34template <typename StoredValue_, typename InputValue_, typename InputIndex_>
35void convert_to_dense(const Matrix<InputValue_, InputIndex_>* matrix, bool row_major, StoredValue_* store, int threads = 1) {
36 InputIndex_ NR = matrix->nrow();
37 InputIndex_ NC = matrix->ncol();
38 bool pref_rows = matrix->prefer_rows();
39 size_t primary = (pref_rows ? NR : NC);
40 size_t secondary = (pref_rows ? NC : NR);
41
42 if (row_major == pref_rows) {
43 constexpr bool same_type = std::is_same<InputValue_, StoredValue_>::value;
44 parallelize([&](int, InputIndex_ start, InputIndex_ length) -> void {
45 std::vector<InputValue_> temp(same_type ? 0 : secondary);
46 auto store_copy = store + static_cast<size_t>(start) * secondary; // cast to size_t to avoid overflow.
48
49 for (InputIndex_ x = 0; x < length; ++x) {
50 if constexpr(same_type) {
51 auto ptr = wrk->fetch(store_copy);
52 copy_n(ptr, secondary, store_copy);
53 } else {
54 auto ptr = wrk->fetch(temp.data());
55 std::copy_n(ptr, secondary, store_copy);
56 }
57 store_copy += secondary;
58 }
59 }, primary, threads);
60
61 } else if (matrix->is_sparse()) {
62 std::fill_n(store, primary * secondary, 0); // already cast to size_t to avoid overflow.
63
64 // We iterate over the input matrix's preferred dimension but split
65 // into threads along the non-preferred dimension. This aims to
66 // reduce false sharing across threads during writes, as locations
67 // for simultaneous writes in the transposed matrix will be
68 // separated by around 'secondary * length' elements.
70 auto store_copy = store;
71
73 std::vector<InputValue_> vtemp(length);
74 std::vector<InputIndex_> itemp(length);
75
76 // Note that we don't use the blocked transposition strategy
77 // from the dense case, because the overhead of looping is
78 // worse than the cache misses for sparse data.
79 for (size_t x = 0; x < primary; ++x) {
80 auto range = wrk->fetch(vtemp.data(), itemp.data());
81 for (InputIndex_ i = 0; i < range.number; ++i) {
82 store_copy[static_cast<size_t>(range.index[i]) * primary] = range.value[i]; // again, casting here.
83 }
84 ++store_copy;
85 }
86 }, secondary, threads);
87
88 } else {
89 // Same logic as described for the sparse case; we iterate along the
90 // preferred dimension but split into threads along the non-preferred
91 // dimension to reduce false sharing.
93 auto store_copy = store + static_cast<size_t>(start) * primary; // cast to size_t to avoid overflow.
94
96 const size_t length_as_size_t = length;
97
98 // Performing a blocked transposition to be more
99 // cache-friendly. This involves collecting several
100 // consecutive primary dimension elements so that we can
101 // transpose by blocks along the secondary dimension.
102 constexpr size_t block_size = 16;
103 const size_t alloc = std::min(primary, block_size);
104 std::vector<InputValue_> bigbuffer(length_as_size_t * alloc);
105 std::vector<const InputValue_*> ptrs(alloc);
106 std::vector<InputValue_*> buf_ptrs;
107 buf_ptrs.reserve(alloc);
108 auto first = bigbuffer.data();
109 for (size_t i = 0; i < alloc; ++i, first += length_as_size_t) {
110 buf_ptrs.push_back(first);
111 }
112
113 size_t prim_i = 0;
114 while (prim_i < primary) {
115 size_t prim_to_process = std::min(primary - prim_i, block_size);
116 for (size_t c = 0; c < prim_to_process; ++c) {
117 ptrs[c] = wrk->fetch(buf_ptrs[c]);
118 }
119
120 size_t sec_i = 0;
121 while (sec_i < length_as_size_t) {
122 size_t sec_to_process = std::min(block_size, length_as_size_t - sec_i);
123 auto output = store_copy + sec_i * primary;
124 for (size_t c = 0; c < prim_to_process; ++c, ++output) {
125 auto input = ptrs[c] + sec_i;
126 auto output2 = output;
127 for (size_t r = 0; r < sec_to_process; ++r, ++input, output2 += primary) {
128 *output2 = *input;
129 }
130 }
132 }
133
136 }
137 }, secondary, threads);
138 }
139
140 return;
141}
142
157template <
158 typename Value_ = double,
159 typename Index_ = int,
160 typename StoredValue_ = Value_,
161 typename InputValue_,
162 typename InputIndex_
163>
164inline std::shared_ptr<Matrix<Value_, Index_> > convert_to_dense(const Matrix<InputValue_, InputIndex_>* matrix, bool row_major, int threads = 1) {
165 auto NR = matrix->nrow();
166 auto NC = matrix->ncol();
167 std::vector<StoredValue_> buffer(static_cast<size_t>(NR) * static_cast<size_t>(NC));
169 return std::shared_ptr<Matrix<Value_, Index_> >(new DenseMatrix<Value_, Index_, decltype(buffer)>(NR, NC, std::move(buffer), row_major));
170}
171
175// Backwards compatbility.
176template<bool row_, typename StoredValue_, typename InputValue_, typename InputIndex_>
179}
180
181template<bool row_, typename Value_, typename Index_, typename StoredValue_ = Value_, typename InputValue_, typename InputIndex_>
182inline std::shared_ptr<Matrix<Value_, Index_> > convert_to_dense(const Matrix<InputValue_, InputIndex_>* matrix, int threads = 1) {
184}
189}
190
191#endif
Dense matrix representation.
Virtual class for a matrix.
Definition Matrix.hpp:59
virtual Index_ ncol() const =0
virtual Index_ nrow() const =0
virtual bool prefer_rows() const =0
Flexible representations for matrix data.
Definition Extractor.hpp:15
void parallelize(Function_ fun, Index_ tasks, int threads)
Definition parallelize.hpp:42
void convert_to_dense(const Matrix< InputValue_, InputIndex_ > *matrix, bool row_major, StoredValue_ *store, int threads=1)
Definition convert_to_dense.hpp:35
Value_ * copy_n(const Value_ *input, Size_ n, Value_ *output)
Definition copy.hpp:25
auto consecutive_extractor(const Matrix< Value_, Index_ > *mat, bool row, Index_ iter_start, Index_ iter_length, Args_ &&... args)
Definition consecutive_extractor.hpp:35
Transpose a dense array.