46 InputIndex_ NR = matrix.
nrow();
47 InputIndex_ NC = matrix.
ncol();
49 std::size_t primary = (pref_rows ? NR : NC);
50 std::size_t secondary = (pref_rows ? NC : NR);
52 if (row_major == pref_rows) {
53 constexpr bool same_type = std::is_same<InputValue_, StoredValue_>::value;
54 parallelize([&](
int, std::size_t start, std::size_t length) ->
void {
55 std::vector<InputValue_> temp(same_type ? 0 : secondary);
58 for (
decltype(length) x = 0; x < length; ++x) {
59 auto store_copy = store +
static_cast<std::size_t
>(start + x) * secondary;
60 if constexpr(same_type) {
61 auto ptr = wrk->fetch(store_copy);
62 copy_n(ptr, secondary, store_copy);
64 auto ptr = wrk->fetch(temp.data());
65 std::copy_n(ptr, secondary, store_copy);
71 std::fill_n(store, primary * secondary, 0);
78 parallelize([&](
int, std::size_t start, std::size_t length) ->
void {
80 std::vector<InputValue_> vtemp(length);
81 std::vector<InputIndex_> itemp(length);
86 for (
decltype(primary) x = 0; x < primary; ++x) {
87 auto range = wrk->fetch(vtemp.data(), itemp.data());
88 for (InputIndex_ i = 0; i < range.number; ++i) {
89 store[
static_cast<std::size_t
>(range.index[i]) * primary + x] = range.value[i];
98 parallelize([&](
int, std::size_t start, std::size_t length) ->
void {
105 constexpr std::size_t block_size = 16;
106 std::size_t alloc = std::min(primary, block_size);
107 std::vector<InputValue_> bigbuffer(length * alloc);
108 std::vector<const InputValue_*> ptrs(alloc);
109 std::vector<InputValue_*> buf_ptrs(alloc);
110 for (
decltype(alloc) i = 0; i < alloc; ++i) {
111 buf_ptrs[i] = bigbuffer.data() + i * length;
114 std::size_t prim_i = 0;
115 while (prim_i < primary) {
116 std::size_t prim_to_process = std::min(
static_cast<std::size_t
>(primary - prim_i), block_size);
117 for (
decltype(prim_to_process) c = 0; c < prim_to_process; ++c) {
118 ptrs[c] = wrk->fetch(buf_ptrs[c]);
121 std::size_t sec_i = 0;
122 while (sec_i < length) {
123 std::size_t sec_end = sec_i + std::min(
static_cast<std::size_t
>(length - sec_i), block_size);
124 for (
decltype(prim_to_process) c = 0; c < prim_to_process; ++c) {
125 auto input = ptrs[c];
126 std::size_t offset = start * primary + (c + prim_i);
127 for (std::size_t r = sec_i; r < sec_end; ++r) {
128 store[r * primary + offset] = input[r];
134 prim_i += prim_to_process;
void convert_to_dense(const Matrix< InputValue_, InputIndex_ > &matrix, bool row_major, StoredValue_ *store, const ConvertToDenseOptions &options)
Definition convert_to_dense.hpp:45
auto consecutive_extractor(const Matrix< Value_, Index_ > &matrix, bool row, Index_ iter_start, Index_ iter_length, Args_ &&... args)
Definition consecutive_extractor.hpp:35