25template<
typename ColIndex_,
typename ValueOut_ =
double,
typename IndexOut_ =
int,
typename ValueIn_,
typename IndexIn_>
27 const auto NR = mat.
nrow(), NC = mat.
ncol();
28 const IndexIn_ leftovers = NC % chunk_size;
29 const IndexIn_ nchunks = sanisizer::max(1, NC / chunk_size + (leftovers != 0));
45 for (
auto& x : max_per_chunk) {
50 for (
auto& x : num_per_chunk) {
64 for (IndexIn_ r = start, end = start + length; r < end; ++r) {
65 const auto range = ext->fetch(r, dbuffer.data(), ibuffer.data());
66 for (IndexIn_ i = 0; i < range.number; ++i) {
68 const auto chunk = range.index[i] / chunk_size;
69 const auto cat = categorize(range.value[i]);
70 max_per_chunk[chunk][r] = std::max(max_per_chunk[chunk][r], cat);
71 ++num_per_chunk[chunk][r];
82 for (IndexIn_ r = start, end = start + length; r < end; ++r) {
83 auto ptr = ext->fetch(r, dbuffer.data());
84 for (IndexIn_ c = 0; c < NC; ++c) {
86 const auto chunk = c / chunk_size;
87 const auto cat = categorize(ptr[c]);
88 max_per_chunk[chunk][r] = std::max(max_per_chunk[chunk][r], cat);
89 ++num_per_chunk[chunk][r];
120 for (IndexIn_ r = start, end = start + length; r < end; ++r) {
121 for (I<
decltype(nchunks)> chunk = 0; chunk < nchunks; ++chunk) {
122 output_positions[chunk] = get_sparse_ptr(store8, store16, store32, assigned_category, assigned_position, chunk, r);
125 auto range = ext->fetch(r, dbuffer.data(), ibuffer.data());
126 for (IndexIn_ i = 0; i < range.number; ++i) {
127 if (range.value[i]) {
128 const IndexIn_ chunk = range.index[i] / chunk_size;
129 const IndexIn_ col = range.index[i] % chunk_size;
130 fill_sparse_value(store8, store16, store32, assigned_category[chunk][r], chunk, col, range.value[i], output_positions[chunk]++);
138 for (IndexIn_ r = start, end = start + length; r < end; ++r) {
139 for (I<
decltype(nchunks)> chunk = 0; chunk < nchunks; ++chunk) {
140 output_positions[chunk] = get_sparse_ptr(store8, store16, store32, assigned_category, assigned_position, chunk, r);
143 auto ptr = ext->fetch(r, dbuffer.data());
144 for (IndexIn_ c = 0; c < NC; ++c) {
146 const IndexIn_ chunk = c / chunk_size;
147 const IndexIn_ col = c % chunk_size;
148 fill_sparse_value(store8, store16, store32, assigned_category[chunk][r], chunk, col, ptr[c], output_positions[chunk]++);
157 return consolidate_matrices<ValueOut_, IndexOut_>(
170template<
typename Value_,
typename IndexIn_>
171std::vector<std::vector<Value_> > convert_by_column_allocate_store_per_chunk(
const IndexIn_ nchunks,
const IndexIn_ NR) {
173 for (
auto& x : output) {
179template<
typename Value_,
typename IndexIn_>
180std::vector<std::vector<Value_> >& convert_by_column_get_store_per_chunk(
182 std::vector<std::vector<Value_> >& base,
183 std::optional<std::vector<std::vector<Value_> > >& tmp,
184 const IndexIn_ nchunks,
188 tmp = convert_by_column_allocate_store_per_chunk<Value_>(nchunks, NR);
195template<
typename Value_>
196void convert_by_column_save_store_per_chunk(
198 std::optional<std::vector<std::vector<Value_> > >& tmp,
199 std::vector<std::vector<std::vector<Value_> > >& collected
202 collected[thread - 1] = std::move(*tmp);
206template<
typename ColIndex_,
typename ValueOut_ =
double,
typename IndexOut_ =
int,
typename ValueIn_,
typename IndexIn_>
207std::shared_ptr<tatami::Matrix<ValueOut_, IndexOut_> > convert_by_column(
const tatami::Matrix<ValueIn_, IndexIn_>& mat,
const IndexIn_ chunk_size,
const int nthreads) {
208 const auto NR = mat.
nrow(), NC = mat.
ncol();
209 const IndexIn_ leftovers = NC % chunk_size;
210 const IndexIn_ nchunks = sanisizer::max(1, NC / chunk_size + (leftovers != 0));
225 auto max_per_chunk = convert_by_column_allocate_store_per_chunk<Category>(nchunks, NR);
226 auto max_per_chunk_threaded = sanisizer::create<std::vector<std::vector<std::vector<Category> > > >(nthreads - 1);
227 auto num_per_chunk = convert_by_column_allocate_store_per_chunk<IndexIn_>(nchunks, NR);
228 auto num_per_chunk_threaded = sanisizer::create<std::vector<std::vector<std::vector<IndexIn_> > > >(nthreads - 1);
232 num_used =
tatami::parallelize([&](
const int t,
const IndexIn_ start,
const IndexIn_ length) ->
void {
241 std::optional<std::vector<std::vector<Category> > > max_tmp;
242 auto& cur_max_per_chunk = convert_by_column_get_store_per_chunk(t, max_per_chunk, max_tmp, nchunks, NR);
243 std::optional<std::vector<std::vector<IndexIn_> > > num_tmp;
244 auto& cur_num_per_chunk = convert_by_column_get_store_per_chunk(t, num_per_chunk, num_tmp, nchunks, NR);
246 for (IndexIn_ c = start, end = start + length; c < end; ++c) {
247 const auto range = ext->fetch(c, dbuffer.data(), ibuffer.data());
248 const auto chunk = c / chunk_size;
249 auto& max_vec = cur_max_per_chunk[chunk];
250 auto& num_vec = cur_num_per_chunk[chunk];
252 for (IndexIn_ i = 0; i < range.number; ++i) {
253 if (range.value[i]) {
254 const auto cat = categorize(range.value[i]);
255 const auto r = range.index[i];
256 max_vec[r] = std::max(max_vec[r], cat);
262 convert_by_column_save_store_per_chunk(t, max_tmp, max_per_chunk_threaded);
263 convert_by_column_save_store_per_chunk(t, num_tmp, num_per_chunk_threaded);
267 num_used =
tatami::parallelize([&](
const int t,
const IndexIn_ start,
const IndexIn_ length) ->
void {
271 std::optional<std::vector<std::vector<Category> > > max_tmp;
272 auto& cur_max_per_chunk = convert_by_column_get_store_per_chunk(t, max_per_chunk, max_tmp, nchunks, NR);
273 std::optional<std::vector<std::vector<IndexIn_> > > num_tmp;
274 auto& cur_num_per_chunk = convert_by_column_get_store_per_chunk(t, num_per_chunk, num_tmp, nchunks, NR);
276 for (IndexIn_ c = start, end = start + length; c < end; ++c) {
277 const auto ptr = ext->fetch(c, dbuffer.data());
278 const auto chunk = c / chunk_size;
279 auto& max_vec = cur_max_per_chunk[chunk];
280 auto& num_vec = cur_num_per_chunk[chunk];
282 for (IndexIn_ r = 0; r < NR; ++r) {
284 auto cat = categorize(ptr[r]);
285 max_vec[r] = std::max(max_vec[r], cat);
291 convert_by_column_save_store_per_chunk(t, max_tmp, max_per_chunk_threaded);
292 convert_by_column_save_store_per_chunk(t, num_tmp, num_per_chunk_threaded);
296 for (
int t = 1; t < num_used; ++t) {
297 const auto& cur_max_per_chunk = max_per_chunk_threaded[t - 1];
298 const auto& cur_num_per_chunk = num_per_chunk_threaded[t - 1];
299 for (I<
decltype(nchunks)> chunk = 0; chunk < nchunks; ++chunk) {
300 for (IndexIn_ r = 0; r < NR; ++r) {
301 max_per_chunk[chunk][r] = std::max(max_per_chunk[chunk][r], cur_max_per_chunk[chunk][r]);
302 num_per_chunk[chunk][r] += cur_num_per_chunk[chunk][r];
325 for (I<
decltype(nchunks)> chunk = 0; chunk < nchunks; ++chunk) {
327 for (IndexIn_ r = 0; r < length; ++r) {
328 output_positions[chunk][r] = get_sparse_ptr(store8, store16, store32, assigned_category, assigned_position, chunk, r + start);
338 for (IndexIn_ c = 0; c < NC; ++c) {
339 const auto range = ext->fetch(c, dbuffer.data(), ibuffer.data());
340 const auto chunk = c / chunk_size;
341 const IndexIn_ col = c % chunk_size;
342 auto& outpos = output_positions[chunk];
344 for (IndexIn_ i = 0; i < range.number; ++i) {
345 if (range.value[i]) {
346 const auto r = range.index[i];
347 fill_sparse_value(store8, store16, store32, assigned_category[chunk][r], chunk, col, range.value[i], outpos[r - start]++);
355 for (IndexIn_ c = 0; c < NC; ++c) {
356 const auto ptr = ext->fetch(c, dbuffer.data());
357 const auto chunk = c / chunk_size;
358 const IndexIn_ col = c % chunk_size;
359 auto& outpos = output_positions[chunk];
361 for (IndexIn_ r = 0; r < NR; ++r) {
363 fill_sparse_value(store8, store16, store32, assigned_category[chunk][r], chunk, col, ptr[r], outpos[r - start]++);
372 return consolidate_matrices<ValueOut_, IndexOut_>(
434template<
typename ValueOut_ =
double,
typename IndexOut_ =
int,
typename ColumnIndex_ = std::u
int16_t,
typename ValueIn_,
typename IndexIn_>
436 const IndexIn_ chunk_size = check_chunk_size<IndexIn_, ColumnIndex_>(options.
chunk_size);
438 return convert_by_row<ColumnIndex_, ValueOut_, IndexOut_>(mat, chunk_size, options.
num_threads);
440 return convert_by_column<ColumnIndex_, ValueOut_, IndexOut_>(mat, chunk_size, options.
num_threads);
448template<
typename ValueOut_ =
double,
typename IndexOut_ =
int,
typename ColumnIndex_ = std::u
int16_t,
typename ValueIn_,
typename IndexIn_>
451 ConvertToLayeredSparseOptions opt;
452 opt.chunk_size = chunk_size;
453 opt.num_threads = num_threads;
458template<
typename ValueOut_ =
double,
typename IndexOut_ =
int,
typename ColumnIndex_ = std::u
int16_t,
typename ValueIn_,
typename IndexIn_>