tatami_stats
Matrix statistics for tatami
Loading...
Searching...
No Matches
grouped_sums.hpp
Go to the documentation of this file.
1#ifndef TATAMI_STATS_GROUPED_SUMS_HPP
2#define TATAMI_STATS_GROUPED_SUMS_HPP
3
4#include "utils.hpp"
5#include "tatami/tatami.hpp"
6#include "sums.hpp"
7#include <vector>
8#include <algorithm>
9#include <cstddef>
10
17namespace tatami_stats {
18
23namespace grouped_sums {
24
28struct Options {
33 bool skip_nan = false;
34
39 int num_threads = 1;
40};
41
64template<typename Value_, typename Index_, typename Group_, typename Output_>
65void apply(bool row, const tatami::Matrix<Value_, Index_>& mat, const Group_* group, std::size_t num_groups, Output_** output, const Options& sopt) {
66 Index_ dim = (row ? mat.nrow() : mat.ncol());
67 Index_ otherdim = (row ? mat.ncol() : mat.nrow());
68
69 if (mat.sparse()) {
70 if (mat.prefer_rows() == row) {
71 tatami::parallelize([&](int, Index_ start, Index_ len) -> void {
72 auto ext = tatami::consecutive_extractor<true>(mat, row, start, len);
73 std::vector<Value_> xbuffer(otherdim);
74 std::vector<Index_> ibuffer(otherdim);
75 std::vector<Output_> tmp(num_groups);
76
77 for (Index_ i = 0; i < len; ++i) {
78 auto range = ext->fetch(xbuffer.data(), ibuffer.data());
79 std::fill(tmp.begin(), tmp.end(), static_cast<Output_>(0));
80
81 internal::nanable_ifelse<Value_>(
82 sopt.skip_nan,
83 [&]() -> void {
84 for (int j = 0; j < range.number; ++j) {
85 auto val = range.value[j];
86 if (!std::isnan(val)) {
87 tmp[group[range.index[j]]] += val;
88 }
89 }
90 },
91 [&]() -> void {
92 for (int j = 0; j < range.number; ++j) {
93 tmp[group[range.index[j]]] += range.value[j];
94 }
95 }
96 );
97
98 for (decltype(num_groups) g = 0; g < num_groups; ++g) {
99 output[g][i + start] = tmp[g];
100 }
101 }
102 }, dim, sopt.num_threads);
103
104 } else {
105 // Order within each observed vector doesn't affect numerical
106 // precision of the outcome, as addition order for each objective
107 // vector is already well-defined for a running calculation.
108 tatami::Options opt;
109 opt.sparse_ordered_index = false;
110
111 tatami::parallelize([&](int thread, Index_ start, Index_ len) -> void {
112 std::vector<sums::RunningSparse<Output_, Value_, Index_> > runners;
113 runners.reserve(num_groups);
114 std::vector<LocalOutputBuffer<Output_> > local_output;
115 local_output.reserve(num_groups);
116
117 for (decltype(num_groups) g = 0; g < num_groups; ++g) {
118 local_output.emplace_back(thread, start, len, output[g]);
119 runners.emplace_back(local_output.back().data(), sopt.skip_nan, start);
120 }
121
122 auto ext = tatami::consecutive_extractor<true>(mat, !row, static_cast<Index_>(0), otherdim, start, len, opt);
123 std::vector<Value_> xbuffer(len);
124 std::vector<Index_> ibuffer(len);
125
126 for (int i = 0; i < otherdim; ++i) {
127 auto range = ext->fetch(xbuffer.data(), ibuffer.data());
128 runners[group[i]].add(range.value, range.index, range.number);
129 }
130
131 for (decltype(num_groups) g = 0; g < num_groups; ++g) {
132 local_output[g].transfer();
133 }
134 }, dim, sopt.num_threads);
135 }
136
137 } else {
138 if (mat.prefer_rows() == row) {
139 tatami::parallelize([&](int, Index_ start, Index_ len) -> void {
140 auto ext = tatami::consecutive_extractor<false>(mat, row, start, len);
141 std::vector<Value_> xbuffer(otherdim);
142 std::vector<Output_> tmp(num_groups);
143
144 for (Index_ i = 0; i < len; ++i) {
145 auto ptr = ext->fetch(xbuffer.data());
146 std::fill(tmp.begin(), tmp.end(), static_cast<Output_>(0));
147
148 internal::nanable_ifelse<Value_>(
149 sopt.skip_nan,
150 [&]() -> void {
151 for (Index_ j = 0; j < otherdim; ++j) {
152 auto val = ptr[j];
153 if (!std::isnan(val)) {
154 tmp[group[j]] += val;
155 }
156 }
157 },
158 [&]() -> void {
159 for (Index_ j = 0; j < otherdim; ++j) {
160 tmp[group[j]] += ptr[j];
161 }
162 }
163 );
164
165 for (decltype(num_groups) g = 0; g < num_groups; ++g) {
166 output[g][i + start] = tmp[g];
167 }
168 }
169 }, dim, sopt.num_threads);
170
171 } else {
172 tatami::parallelize([&](int thread, Index_ start, Index_ len) -> void {
173 std::vector<sums::RunningDense<Output_, Value_, Index_> > runners;
174 runners.reserve(num_groups);
175 std::vector<LocalOutputBuffer<Output_> > local_output;
176 local_output.reserve(num_groups);
177
178 for (decltype(num_groups) g = 0; g < num_groups; ++g) {
179 local_output.emplace_back(thread, start, len, output[g]);
180 runners.emplace_back(len, local_output.back().data(), sopt.skip_nan);
181 }
182
183 std::vector<Value_> xbuffer(len);
184 auto ext = tatami::consecutive_extractor<false>(mat, !row, static_cast<Index_>(0), otherdim, start, len);
185
186 for (int i = 0; i < otherdim; ++i) {
187 auto ptr = ext->fetch(xbuffer.data());
188 runners[group[i]].add(ptr);
189 }
190
191 for (decltype(num_groups) g = 0; g < num_groups; ++g) {
192 local_output[g].transfer();
193 }
194 }, dim, sopt.num_threads);
195 }
196 }
197}
198
202// Back-compatibility.
203template<typename Value_, typename Index_, typename Group_, typename Output_>
204void apply(bool row, const tatami::Matrix<Value_, Index_>* p, const Group_* group, std::size_t num_groups, Output_** output, const Options& sopt) {
205 apply(row, *p, group, num_groups, output, sopt);
206}
228template<typename Output_ = double, typename Value_, typename Index_, typename Group_>
229std::vector<std::vector<Output_> > by_row(const tatami::Matrix<Value_, Index_>& mat, const Group_* group, const Options& sopt) {
230 auto mydim = mat.nrow();
231 auto ngroup = total_groups(group, mat.ncol());
232
233 std::vector<std::vector<Output_> > output(ngroup);
234 std::vector<Output_*> ptrs;
235 ptrs.reserve(output.size());
236 for (auto& o : output) {
237 o.resize(mydim);
238 ptrs.push_back(o.data());
239 }
240
241 apply(true, mat, group, ngroup, ptrs.data(), sopt);
242 return output;
243}
244
248// Back-compatibility.
249template<typename Output_ = double, typename Value_, typename Index_, typename Group_>
250std::vector<std::vector<Output_> > by_row(const tatami::Matrix<Value_, Index_>* p, const Group_* group, const Options& sopt) {
251 return by_row<Output_>(*p, group, sopt);
252}
253
254template<typename Output_ = double, typename Value_, typename Index_, typename Group_>
255std::vector<std::vector<Output_> > by_row(const tatami::Matrix<Value_, Index_>& mat, const Group_* group) {
256 return by_row<Output_>(mat, group, Options());
257}
258
259template<typename Output_ = double, typename Value_, typename Index_, typename Group_>
260std::vector<std::vector<Output_> > by_row(const tatami::Matrix<Value_, Index_>* p, const Group_* group) {
261 return by_row<Output_>(*p, group);
262}
284template<typename Output_ = double, typename Value_, typename Index_, typename Group_>
285std::vector<std::vector<Output_> > by_column(const tatami::Matrix<Value_, Index_>& mat, const Group_* group, const Options& sopt) {
286 auto mydim = mat.ncol();
287 auto ngroup = total_groups(group, mat.nrow());
288
289 std::vector<std::vector<Output_> > output(ngroup);
290 std::vector<Output_*> ptrs;
291 ptrs.reserve(output.size());
292 for (auto& o : output) {
293 o.resize(mydim);
294 ptrs.push_back(o.data());
295 }
296
297 apply(false, mat, group, ngroup, ptrs.data(), sopt);
298 return output;
299}
300
304// Back-compatibility.
305template<typename Output_ = double, typename Value_, typename Index_, typename Group_>
306std::vector<std::vector<Output_> > by_column(const tatami::Matrix<Value_, Index_>* p, const Group_* group, const Options& sopt) {
307 return by_column<Output_>(*p, group, sopt);
308}
309
310template<typename Output_ = double, typename Value_, typename Index_, typename Group_>
311std::vector<std::vector<Output_> > by_column(const tatami::Matrix<Value_, Index_>& mat, const Group_* group) {
312 return by_column<Output_>(mat, group, Options());
313}
314
315template<typename Output_ = double, typename Value_, typename Index_, typename Group_>
316std::vector<std::vector<Output_> > by_column(const tatami::Matrix<Value_, Index_>* p, const Group_* group) {
317 return by_column<Output_>(*p, group);
318}
323}
324
325}
326
327#endif
virtual Index_ ncol() const=0
virtual Index_ nrow() const=0
virtual bool prefer_rows() const=0
virtual std::unique_ptr< MyopicSparseExtractor< Value_, Index_ > > sparse(bool row, const Options &opt) const=0
std::vector< Output_ > by_row(const tatami::Matrix< Value_, Index_ > &mat, const Options &nopt)
Definition counts.hpp:229
std::vector< Output_ > by_column(const tatami::Matrix< Value_, Index_ > &mat, const Options &nopt)
Definition counts.hpp:288
void apply(bool row, const tatami::Matrix< Value_, Index_ > &mat, Output_ *output, int num_threads, Condition_ condition)
Definition counts.hpp:44
void apply(bool row, const tatami::Matrix< Value_, Index_ > &mat, const Group_ *group, std::size_t num_groups, Output_ **output, const Options &sopt)
Definition grouped_sums.hpp:65
Functions to compute statistics from a tatami::Matrix.
Definition counts.hpp:18
std::size_t total_groups(const Group_ *group, std::size_t n)
Definition utils.hpp:29
void parallelize(Function_ fun, Index_ tasks, int threads)
auto consecutive_extractor(const Matrix< Value_, Index_ > &matrix, bool row, Index_ iter_start, Index_ iter_length, Args_ &&... args)
bool sparse_ordered_index
Grouped summation options.
Definition grouped_sums.hpp:28
bool skip_nan
Definition grouped_sums.hpp:33
int num_threads
Definition grouped_sums.hpp:39
Compute row and column sums from a tatami::Matrix.
Utilities for computing matrix statistics.