1#ifndef TATAMI_CONVERT_TO_DENSE_H
2#define TATAMI_CONVERT_TO_DENSE_H
16#include "sanisizer/sanisizer.hpp"
39template <
typename StoredValue_,
typename InputValue_,
typename InputIndex_>
41 const InputIndex_ NR = matrix.
nrow();
42 const InputIndex_ NC = matrix.
ncol();
43 const auto primary = (row ? NR : NC);
44 const auto secondary = (row ? NC : NR);
46 constexpr bool same_type = std::is_same<InputValue_, StoredValue_>::value;
49 parallelize([&](
const int,
const InputIndex_ start,
const InputIndex_ length) ->
void {
52 if constexpr(same_type) {
55 return std::vector<InputValue_>(secondary);
59 for (InputIndex_ x = 0; x < length; ++x) {
60 const auto store_copy = store + sanisizer::product_unsafe<std::size_t>(secondary, start + x);
61 if constexpr(same_type) {
62 auto ptr = wrk->fetch(store_copy);
63 copy_n(ptr, secondary, store_copy);
65 auto ptr = wrk->fetch(temp.data());
66 std::copy_n(ptr, secondary, store_copy);
72template <
typename StoredValue_,
typename InputValue_,
typename InputIndex_>
73void convert_to_dense_running_from_sparse(
const Matrix<InputValue_, InputIndex_>& matrix,
const bool row, StoredValue_*
const store,
const ConvertToDenseOptions& options) {
74 const InputIndex_ NR = matrix.nrow();
75 const InputIndex_ NC = matrix.ncol();
76 const auto primary = (row ? NR : NC);
77 const auto secondary = (row ? NC : NR);
86 struct ThreadSpecificHolder {
87 ThreadSpecificHolder(
const InputIndex_ start,
const InputIndex_ length) :
93 std::vector<std::vector<InputValue_> > values;
94 std::vector<std::vector<InputIndex_> > indices;
97 const bool do_parallel = options.num_threads > 1;
98 std::optional<std::vector<std::optional<ThreadSpecificHolder> > > all_partial_contents;
101 all_partial_contents.emplace(sanisizer::cast<I<
decltype(all_partial_contents->size())> >(options.num_threads - 1));
106 std::fill_n(store, sanisizer::product_unsafe<std::size_t>(primary, secondary), 0);
108 const auto num_used =
parallelize([&](
const int thread,
const InputIndex_ start,
const InputIndex_ length) ->
void {
113 if (!do_parallel || thread == 0) {
114 for (InputIndex_ x = 0; x < length; ++x) {
115 const auto range = wrk->fetch(vtemp.data(), itemp.data());
116 for (InputIndex_ i = 0; i < range.number; ++i) {
117 store[sanisizer::nd_offset<std::size_t>(start + x, secondary, range.index[i])] = range.value[i];
122 ThreadSpecificHolder tmp(start, length);
123 for (InputIndex_ x = 0; x < length; ++x) {
124 const auto range = wrk->fetch(vtemp.data(), itemp.data());
125 tmp.values[x] = std::vector<InputValue_>(range.value, range.value + range.number);
126 tmp.indices[x] = std::vector<InputIndex_>(range.index, range.index + range.number);
128 (*all_partial_contents)[thread - 1] = std::move(tmp);
130 }, secondary, options.num_threads);
134 for (
int u = 1; u < num_used; ++u) {
135 const auto start = (*all_partial_contents)[u - 1]->start;
136 const auto& tmp_values = (*all_partial_contents)[u - 1]->values;
137 const auto& tmp_indices = (*all_partial_contents)[u - 1]->indices;
138 const auto length = tmp_indices.size();
139 for (I<
decltype(length)> x = 0; x < length ; ++x) {
140 const auto& cur_values = tmp_values[x];
141 const auto& cur_indices = tmp_indices[x];
142 const auto cur_count = cur_indices.size();
143 for (I<
decltype(cur_count)> i = 0; i < cur_count; ++i) {
144 store[sanisizer::nd_offset<std::size_t>(start + x, secondary, cur_indices[i])] = cur_values[i];
151template <
typename StoredValue_,
typename InputValue_,
typename InputIndex_>
152void convert_to_dense_running_from_dense(
const Matrix<InputValue_, InputIndex_>& matrix,
const bool row, StoredValue_*
const store,
const ConvertToDenseOptions& options) {
153 const InputIndex_ NR = matrix.nrow();
154 const InputIndex_ NC = matrix.ncol();
155 const auto primary = (row ? NR : NC);
156 const auto secondary = (row ? NC : NR);
165 const bool do_parallel = options.num_threads > 1;
166 struct ThreadSpecificHolder {
167 ThreadSpecificHolder(
const InputIndex_ start,
const InputIndex_ length,
const InputIndex_ primary) :
170 values(sanisizer::product<I<decltype(values.size())> >(length, primary))
172 InputIndex_ start, length;
173 std::vector<InputValue_> values;
175 std::optional<std::vector<std::optional<ThreadSpecificHolder> > > all_partial_contents;
177 all_partial_contents.emplace(sanisizer::cast<I<
decltype(all_partial_contents->size())> >(options.num_threads - 1));
182 std::fill_n(store, sanisizer::product_unsafe<std::size_t>(primary, secondary), 0);
184 const auto num_used =
parallelize([&](
const int thread,
const InputIndex_ start,
const InputIndex_ length) ->
void {
187 if (!do_parallel || thread == 0) {
190 constexpr InputIndex_ block_size = 16;
191 const InputIndex_ alloc = std::min(length, block_size);
192 std::vector<InputValue_> bigbuffer(sanisizer::product_unsafe<
typename std::vector<InputValue_>::size_type>(primary, alloc));
193 std::vector<const InputValue_*> ptrs(alloc);
195 InputIndex_ sec_i = 0;
196 while (sec_i < length) {
197 const InputIndex_ sec_to_process = std::min(
static_cast<InputIndex_
>(length - sec_i), block_size);
198 for (InputIndex_ x = 0; x < sec_to_process; ++x) {
199 ptrs[x] = wrk->fetch(bigbuffer.data() + sanisizer::product_unsafe<std::size_t>(primary, x));
202 InputIndex_ prim_i = 0;
203 while (prim_i < primary) {
204 const InputIndex_ prim_end = prim_i + std::min(
static_cast<InputIndex_
>(primary - prim_i), block_size);
205 for (InputIndex_ x = 0; x < sec_to_process; ++x) {
206 const auto input = ptrs[x];
207 for (InputIndex_ p = prim_i; p < prim_end; ++p) {
208 store[sanisizer::nd_offset<std::size_t>(start + sec_i + x, secondary, p)] = input[p];
213 sec_i += sec_to_process;
217 ThreadSpecificHolder tmp(start, length, primary);
218 for (InputIndex_ x = 0; x < length; ++x) {
219 const auto buffer = tmp.values.data() + sanisizer::product_unsafe<std::size_t>(x, primary);
220 const auto ptr = wrk->fetch(buffer);
221 copy_n(ptr, primary, buffer);
223 (*all_partial_contents)[thread - 1] = std::move(tmp);
225 }, secondary, options.num_threads);
229 for (
int u = 1; u < num_used; ++u) {
230 const auto tmp_start = (*all_partial_contents)[u - 1]->start;
231 const auto tmp_length = (*all_partial_contents)[u - 1]->length;
232 const auto& tmp_values = (*all_partial_contents)[u - 1]->values;
233 transpose(tmp_values.data(), tmp_length, primary, primary, store + tmp_start, secondary);
252template <
typename StoredValue_,
typename InputValue_,
typename InputIndex_>
255 convert_to_dense_direct(matrix, row_major, store, options);
257 convert_to_dense_running_from_sparse(matrix, row_major, store, options);
259 convert_to_dense_running_from_dense(matrix, row_major, store, options);
280 typename StoredValue_ = Value_,
281 typename InputValue_,
285 const auto NR = matrix.
nrow();
286 const auto NC = matrix.
ncol();
287 const auto buffer_size = sanisizer::product<typename std::vector<StoredValue_>::size_type>(attest_for_Index(NR), attest_for_Index(NC));
288 std::vector<StoredValue_> buffer(buffer_size);
291 return std::shared_ptr<Matrix<Value_, Index_> >(
292 new DenseMatrix<Value_, Index_, I<
decltype(buffer)> >(
293 sanisizer::cast<Index_>(attest_for_Index(NR)),
294 sanisizer::cast<Index_>(attest_for_Index(NC)),
305template <
typename StoredValue_,
typename InputValue_,
typename InputIndex_>
306void convert_to_dense(
const Matrix<InputValue_, InputIndex_>* matrix,
bool row_major, StoredValue_* store,
int threads = 1) {
312 ConvertToDenseOptions options;
313 options.num_threads = threads;
319template <
typename Value_ =
double,
typename Index_ =
int,
typename StoredValue_ = Value_,
typename InputValue_,
typename InputIndex_>
320inline std::shared_ptr<Matrix<Value_, Index_> >
convert_to_dense(
const Matrix<InputValue_, InputIndex_>* matrix,
bool row_major,
int threads = 1) {
321 ConvertToDenseOptions options;
322 options.num_threads = threads;
327 ConvertToDenseOptions options;
328 options.num_threads = threads;
334template<
bool row_,
typename StoredValue_,
typename InputValue_,
typename InputIndex_>
335void convert_to_dense(
const Matrix<InputValue_, InputIndex_>* matrix, StoredValue_* store,
int threads = 1) {
339template<
bool row_,
typename Value_,
typename Index_,
typename StoredValue_ = Value_,
typename InputValue_,
typename InputIndex_>
340inline std::shared_ptr<Matrix<Value_, Index_> >
convert_to_dense(
const Matrix<InputValue_, InputIndex_>* matrix,
int threads = 1) {
Dense matrix representation.
Convert index type to container size.
Dense matrix representation.
Definition DenseMatrix.hpp:180
Virtual class for a matrix.
Definition Matrix.hpp:59
virtual Index_ ncol() const =0
virtual Index_ nrow() const =0
virtual bool prefer_rows() const =0
virtual bool is_sparse() const =0
Copy data from one buffer to another.
Flexible representations for matrix data.
Definition Extractor.hpp:15
Index_ can_cast_Index_to_container_size(const Index_ x)
Definition Index_to_container.hpp:49
void transpose(const Input_ *const input, const std::size_t nrow, const std::size_t ncol, const std::size_t input_stride, Output_ *const output, const std::size_t output_stride)
Definition transpose.hpp:38
int parallelize(Function_ fun, const Index_ tasks, const int workers)
Definition parallelize.hpp:58
void convert_to_dense(const Matrix< InputValue_, InputIndex_ > &matrix, const bool row_major, StoredValue_ *const store, const ConvertToDenseOptions &options)
Definition convert_to_dense.hpp:253
I< decltype(std::declval< Container_ >().size())> cast_Index_to_container_size(const Index_ x)
Definition Index_to_container.hpp:65
Value_ * copy_n(const Value_ *const input, const Size_ n, Value_ *const output)
Definition copy.hpp:37
Container_ create_container_of_Index_size(const Index_ x, Args_ &&... args)
Definition Index_to_container.hpp:82
auto consecutive_extractor(const Matrix< Value_, Index_ > &matrix, const bool row, const Index_ iter_start, const Index_ iter_length, Args_ &&... args)
Definition consecutive_extractor.hpp:35
Parallelized iteration over a tatami::Matrix.
Options for convert_to_dense().
Definition convert_to_dense.hpp:29
int num_threads
Definition convert_to_dense.hpp:33