tatami_stats
Matrix statistics for tatami
Loading...
Searching...
No Matches
sums.hpp
Go to the documentation of this file.
1#ifndef TATAMI_STATS__SUMS_HPP
2#define TATAMI_STATS__SUMS_HPP
3
4#include "utils.hpp"
5
6#include <vector>
7#include <numeric>
8#include <algorithm>
9
10#include "tatami/tatami.hpp"
11
18namespace tatami_stats {
19
24namespace sums {
25
29struct Options {
34 bool skip_nan = false;
35
40 int num_threads = 1;
41};
42
58template<typename Output_ = double, typename Value_, typename Index_>
59Output_ direct(const Value_* ptr, Index_ num, bool skip_nan) {
60 return internal::nanable_ifelse_with_value<Value_>(
61 skip_nan,
62 [&]() -> Output_ {
63 Output_ sum = 0;
64 for (Index_ i = 0; i < num; ++i) {
65 auto val = ptr[i];
66 if (!std::isnan(val)) {
67 sum += val;
68 }
69 }
70 return sum;
71 },
72 [&]() -> Output_ {
73 return std::accumulate(ptr, ptr + num, static_cast<Output_>(0));
74 }
75 );
76}
77
95template<typename Output_, typename Value_, typename Index_>
97public:
104 RunningDense(Index_ num, Output_* sum, bool skip_nan) : my_num(num), my_sum(sum), my_skip_nan(skip_nan) {}
105
110 void add(const Value_* ptr) {
111 internal::nanable_ifelse<Value_>(
112 my_skip_nan,
113 [&]() -> void {
114 for (Index_ i = 0; i < my_num; ++i) {
115 auto val = ptr[i];
116 if (!std::isnan(val)) {
117 my_sum[i] += val;
118 }
119 }
120 },
121 [&]() -> void {
122 for (Index_ i = 0; i < my_num; ++i) {
123 my_sum[i] += ptr[i];
124 }
125 }
126 );
127 }
128
129private:
130 Index_ my_num;
131 Output_* my_sum;
132 bool my_skip_nan;
133};
134
146template<typename Output_, typename Value_, typename Index_>
148public:
157 RunningSparse(Output_* sum, bool skip_nan, Index_ subtract = 0) :
158 my_sum(sum), my_skip_nan(skip_nan), my_subtract(subtract) {}
159
167 void add(const Value_* value, const Index_* index, Index_ number) {
168 internal::nanable_ifelse<Value_>(
169 my_skip_nan,
170 [&]() -> void {
171 for (Index_ i = 0; i < number; ++i) {
172 auto val = value[i];
173 if (!std::isnan(val)) {
174 my_sum[index[i] - my_subtract] += val;
175 }
176 }
177 },
178 [&]() -> void {
179 for (Index_ i = 0; i < number; ++i) {
180 my_sum[index[i] - my_subtract] += value[i];
181 }
182 }
183 );
184 }
185
186private:
187 Output_* my_sum;
188 bool my_skip_nan;
189 Index_ my_subtract;
190};
191
211template<typename Value_, typename Index_, typename Output_>
212void apply(bool row, const tatami::Matrix<Value_, Index_>& mat, Output_* output, const Options& sopt) {
213 auto dim = (row ? mat.nrow() : mat.ncol());
214 auto otherdim = (row ? mat.ncol() : mat.nrow());
215 const bool direct = mat.prefer_rows() == row;
216
217 if (mat.sparse()) {
218 if (direct) {
219 tatami::Options opt;
220 opt.sparse_extract_index = false;
221
222 tatami::parallelize([&](int, Index_ s, Index_ l) -> void {
223 auto ext = tatami::consecutive_extractor<true>(mat, row, s, l, opt);
225 for (Index_ x = 0; x < l; ++x) {
226 auto out = ext->fetch(vbuffer.data(), NULL);
227 output[x + s] = sums::direct(out.value, out.number, sopt.skip_nan);
228 }
229 }, dim, sopt.num_threads);
230
231 } else {
232 tatami::Options opt;
233 opt.sparse_ordered_index = false;
234
235 tatami::parallelize([&](int thread, Index_ s, Index_ l) -> void {
236 auto ext = tatami::consecutive_extractor<true>(mat, !row, static_cast<Index_>(0), otherdim, s, l, opt);
239
240 LocalOutputBuffer<Output_> local_output(thread, s, l, output);
241 sums::RunningSparse<Output_, Value_, Index_> runner(local_output.data(), sopt.skip_nan, s);
242
243 for (Index_ x = 0; x < otherdim; ++x) {
244 auto out = ext->fetch(vbuffer.data(), ibuffer.data());
245 runner.add(out.value, out.index, out.number);
246 }
247
248 local_output.transfer();
249 }, dim, sopt.num_threads);
250 }
251
252 } else {
253 if (direct) {
254 tatami::parallelize([&](int, Index_ s, Index_ l) -> void {
255 auto ext = tatami::consecutive_extractor<false>(mat, row, s, l);
257 for (Index_ x = 0; x < l; ++x) {
258 auto out = ext->fetch(buffer.data());
259 output[x + s] = sums::direct(out, otherdim, sopt.skip_nan);
260 }
261 }, dim, sopt.num_threads);
262
263 } else {
264 tatami::parallelize([&](int thread, Index_ s, Index_ l) -> void {
265 auto ext = tatami::consecutive_extractor<false>(mat, !row, static_cast<Index_>(0), otherdim, s, l);
267
268 LocalOutputBuffer<Output_> local_output(thread, s, l, output);
269 sums::RunningDense<Output_, Value_, Index_> runner(l, local_output.data(), sopt.skip_nan);
270
271 for (Index_ x = 0; x < otherdim; ++x) {
272 auto out = ext->fetch(buffer.data());
273 runner.add(out);
274 }
275
276 local_output.transfer();
277 }, dim, sopt.num_threads);
278 }
279 }
280
281 return;
282}
283
287// Back-compatibility.
288template<typename Value_, typename Index_, typename Output_>
289void apply(bool row, const tatami::Matrix<Value_, Index_>* p, Output_* output, const Options& sopt) {
290 apply(row, *p, output, sopt);
291}
308template<typename Output_ = double, typename Value_, typename Index_>
309std::vector<Output_> by_column(const tatami::Matrix<Value_, Index_>& mat, const Options& sopt) {
311 apply(false, mat, output.data(), sopt);
312 return output;
313}
314
318// Back-compatibility.
319template<typename Output_ = double, typename Value_, typename Index_>
320std::vector<Output_> by_column(const tatami::Matrix<Value_, Index_>* p, const Options& sopt) {
321 return by_column<Output_>(*p, sopt);
322}
323
324template<typename Output_ = double, typename Value_, typename Index_>
325std::vector<Output_> by_column(const tatami::Matrix<Value_, Index_>& mat) {
326 return by_column<Output_>(mat, {});
327}
328
329template<typename Output_ = double, typename Value_, typename Index_>
330std::vector<Output_> by_column(const tatami::Matrix<Value_, Index_>* p) {
331 return by_column<Output_>(*p);
332}
349template<typename Output_ = double, typename Value_, typename Index_>
350std::vector<Output_> by_row(const tatami::Matrix<Value_, Index_>& mat, const Options& sopt) {
352 apply(true, mat, output.data(), sopt);
353 return output;
354}
355
359// Back-compatibility.
360template<typename Output_ = double, typename Value_, typename Index_>
361std::vector<Output_> by_row(const tatami::Matrix<Value_, Index_>* p, const Options& sopt) {
362 return by_row<Output_>(*p, sopt);
363}
364
365template<typename Output_ = double, typename Value_, typename Index_>
366std::vector<Output_> by_row(const tatami::Matrix<Value_, Index_>& mat) {
367 return by_row<Output_>(mat, {});
368}
369
370template<typename Output_ = double, typename Value_, typename Index_>
371std::vector<Output_> by_row(const tatami::Matrix<Value_, Index_>* p) {
372 return by_row<Output_>(*p);
373}
378}
379
380}
381
382#endif
virtual Index_ ncol() const=0
virtual Index_ nrow() const=0
virtual bool prefer_rows() const=0
virtual std::unique_ptr< MyopicSparseExtractor< Value_, Index_ > > sparse(bool row, const Options &opt) const=0
Local output buffer for running calculations.
Definition utils.hpp:93
Running sums from dense data.
Definition sums.hpp:96
void add(const Value_ *ptr)
Definition sums.hpp:110
RunningDense(Index_ num, Output_ *sum, bool skip_nan)
Definition sums.hpp:104
Running sums from sparse data.
Definition sums.hpp:147
void add(const Value_ *value, const Index_ *index, Index_ number)
Definition sums.hpp:167
RunningSparse(Output_ *sum, bool skip_nan, Index_ subtract=0)
Definition sums.hpp:157
std::vector< Output_ > by_row(const tatami::Matrix< Value_, Index_ > &mat, const Options &sopt)
Definition sums.hpp:350
void apply(bool row, const tatami::Matrix< Value_, Index_ > &mat, Output_ *output, const Options &sopt)
Definition sums.hpp:212
std::vector< Output_ > by_column(const tatami::Matrix< Value_, Index_ > &mat, const Options &sopt)
Definition sums.hpp:309
Output_ direct(const Value_ *ptr, Index_ num, bool skip_nan)
Definition sums.hpp:59
Functions to compute statistics from a tatami::Matrix.
Definition counts.hpp:18
void parallelize(Function_ fun, const Index_ tasks, const int threads)
Container_ create_container_of_Index_size(const Index_ x, Args_ &&... args)
auto consecutive_extractor(const Matrix< Value_, Index_ > &matrix, const bool row, const Index_ iter_start, const Index_ iter_length, Args_ &&... args)
bool sparse_extract_index
bool sparse_ordered_index
Summation options.
Definition sums.hpp:29
int num_threads
Definition sums.hpp:40
bool skip_nan
Definition sums.hpp:34
Utilities for computing matrix statistics.