tatami_stats
Matrix statistics for tatami
Loading...
Searching...
No Matches
sums.hpp
Go to the documentation of this file.
1#ifndef TATAMI_STATS__SUMS_HPP
2#define TATAMI_STATS__SUMS_HPP
3
4#include "utils.hpp"
5
6#include <vector>
7#include <numeric>
8#include <algorithm>
9
10#include "tatami/tatami.hpp"
11
18namespace tatami_stats {
19
24namespace sums {
25
29struct Options {
34 bool skip_nan = false;
35
40 int num_threads = 1;
41};
42
57template<typename Output_ = double, typename Value_, typename Index_>
58Output_ direct(const Value_* ptr, Index_ num, bool skip_nan) {
59 return internal::nanable_ifelse_with_value<Value_>(
60 skip_nan,
61 [&]() -> Output_ {
62 Output_ sum = 0;
63 for (Index_ i = 0; i < num; ++i) {
64 auto val = ptr[i];
65 if (!std::isnan(val)) {
66 sum += val;
67 }
68 }
69 return sum;
70 },
71 [&]() -> Output_ {
72 return std::accumulate(ptr, ptr + num, static_cast<Output_>(0));
73 }
74 );
75}
76
93template<typename Output_, typename Value_, typename Index_>
95public:
102 RunningDense(Index_ num, Output_* sum, bool skip_nan) : my_num(num), my_sum(sum), my_skip_nan(skip_nan) {}
103
108 void add(const Value_* ptr) {
109 internal::nanable_ifelse<Value_>(
110 my_skip_nan,
111 [&]() -> void {
112 for (Index_ i = 0; i < my_num; ++i) {
113 auto val = ptr[i];
114 if (!std::isnan(val)) {
115 my_sum[i] += val;
116 }
117 }
118 },
119 [&]() -> void {
120 for (Index_ i = 0; i < my_num; ++i) {
121 my_sum[i] += ptr[i];
122 }
123 }
124 );
125 }
126
127private:
128 Index_ my_num;
129 Output_* my_sum;
130 bool my_skip_nan;
131};
132
143template<typename Output_, typename Value_, typename Index_>
145public:
154 RunningSparse(Output_* sum, bool skip_nan, Index_ subtract = 0) :
155 my_sum(sum), my_skip_nan(skip_nan), my_subtract(subtract) {}
156
164 void add(const Value_* value, const Index_* index, Index_ number) {
165 internal::nanable_ifelse<Value_>(
166 my_skip_nan,
167 [&]() -> void {
168 for (Index_ i = 0; i < number; ++i) {
169 auto val = value[i];
170 if (!std::isnan(val)) {
171 my_sum[index[i] - my_subtract] += val;
172 }
173 }
174 },
175 [&]() -> void {
176 for (Index_ i = 0; i < number; ++i) {
177 my_sum[index[i] - my_subtract] += value[i];
178 }
179 }
180 );
181 }
182
183private:
184 Output_* my_sum;
185 bool my_skip_nan;
186 Index_ my_subtract;
187};
188
207template<typename Value_, typename Index_, typename Output_>
208void apply(bool row, const tatami::Matrix<Value_, Index_>& mat, Output_* output, const Options& sopt) {
209 auto dim = (row ? mat.nrow() : mat.ncol());
210 auto otherdim = (row ? mat.ncol() : mat.nrow());
211 const bool direct = mat.prefer_rows() == row;
212
213 if (mat.sparse()) {
214 if (direct) {
215 tatami::Options opt;
216 opt.sparse_extract_index = false;
217
218 tatami::parallelize([&](int, Index_ s, Index_ l) -> void {
219 auto ext = tatami::consecutive_extractor<true>(mat, row, s, l, opt);
221 for (Index_ x = 0; x < l; ++x) {
222 auto out = ext->fetch(vbuffer.data(), NULL);
223 output[x + s] = sums::direct(out.value, out.number, sopt.skip_nan);
224 }
225 }, dim, sopt.num_threads);
226
227 } else {
228 tatami::Options opt;
229 opt.sparse_ordered_index = false;
230
231 tatami::parallelize([&](int thread, Index_ s, Index_ l) -> void {
232 auto ext = tatami::consecutive_extractor<true>(mat, !row, static_cast<Index_>(0), otherdim, s, l, opt);
235
236 LocalOutputBuffer<Output_> local_output(thread, s, l, output);
237 sums::RunningSparse<Output_, Value_, Index_> runner(local_output.data(), sopt.skip_nan, s);
238
239 for (Index_ x = 0; x < otherdim; ++x) {
240 auto out = ext->fetch(vbuffer.data(), ibuffer.data());
241 runner.add(out.value, out.index, out.number);
242 }
243
244 local_output.transfer();
245 }, dim, sopt.num_threads);
246 }
247
248 } else {
249 if (direct) {
250 tatami::parallelize([&](int, Index_ s, Index_ l) -> void {
251 auto ext = tatami::consecutive_extractor<false>(mat, row, s, l);
253 for (Index_ x = 0; x < l; ++x) {
254 auto out = ext->fetch(buffer.data());
255 output[x + s] = sums::direct(out, otherdim, sopt.skip_nan);
256 }
257 }, dim, sopt.num_threads);
258
259 } else {
260 tatami::parallelize([&](int thread, Index_ s, Index_ l) -> void {
261 auto ext = tatami::consecutive_extractor<false>(mat, !row, static_cast<Index_>(0), otherdim, s, l);
263
264 LocalOutputBuffer<Output_> local_output(thread, s, l, output);
265 sums::RunningDense<Output_, Value_, Index_> runner(l, local_output.data(), sopt.skip_nan);
266
267 for (Index_ x = 0; x < otherdim; ++x) {
268 auto out = ext->fetch(buffer.data());
269 runner.add(out);
270 }
271
272 local_output.transfer();
273 }, dim, sopt.num_threads);
274 }
275 }
276
277 return;
278}
279
283// Back-compatibility.
284template<typename Value_, typename Index_, typename Output_>
285void apply(bool row, const tatami::Matrix<Value_, Index_>* p, Output_* output, const Options& sopt) {
286 apply(row, *p, output, sopt);
287}
303template<typename Output_ = double, typename Value_, typename Index_>
304std::vector<Output_> by_column(const tatami::Matrix<Value_, Index_>& mat, const Options& sopt) {
306 apply(false, mat, output.data(), sopt);
307 return output;
308}
309
313// Back-compatibility.
314template<typename Output_ = double, typename Value_, typename Index_>
315std::vector<Output_> by_column(const tatami::Matrix<Value_, Index_>* p, const Options& sopt) {
316 return by_column<Output_>(*p, sopt);
317}
318
319template<typename Output_ = double, typename Value_, typename Index_>
320std::vector<Output_> by_column(const tatami::Matrix<Value_, Index_>& mat) {
321 return by_column<Output_>(mat, {});
322}
323
324template<typename Output_ = double, typename Value_, typename Index_>
325std::vector<Output_> by_column(const tatami::Matrix<Value_, Index_>* p) {
326 return by_column<Output_>(*p);
327}
343template<typename Output_ = double, typename Value_, typename Index_>
344std::vector<Output_> by_row(const tatami::Matrix<Value_, Index_>& mat, const Options& sopt) {
346 apply(true, mat, output.data(), sopt);
347 return output;
348}
349
353// Back-compatibility.
354template<typename Output_ = double, typename Value_, typename Index_>
355std::vector<Output_> by_row(const tatami::Matrix<Value_, Index_>* p, const Options& sopt) {
356 return by_row<Output_>(*p, sopt);
357}
358
359template<typename Output_ = double, typename Value_, typename Index_>
360std::vector<Output_> by_row(const tatami::Matrix<Value_, Index_>& mat) {
361 return by_row<Output_>(mat, {});
362}
363
364template<typename Output_ = double, typename Value_, typename Index_>
365std::vector<Output_> by_row(const tatami::Matrix<Value_, Index_>* p) {
366 return by_row<Output_>(*p);
367}
372}
373
374}
375
376#endif
virtual Index_ ncol() const=0
virtual Index_ nrow() const=0
virtual bool prefer_rows() const=0
virtual std::unique_ptr< MyopicSparseExtractor< Value_, Index_ > > sparse(bool row, const Options &opt) const=0
Local output buffer for running calculations.
Definition utils.hpp:82
Running sums from dense data.
Definition sums.hpp:94
void add(const Value_ *ptr)
Definition sums.hpp:108
RunningDense(Index_ num, Output_ *sum, bool skip_nan)
Definition sums.hpp:102
Running sums from sparse data.
Definition sums.hpp:144
void add(const Value_ *value, const Index_ *index, Index_ number)
Definition sums.hpp:164
RunningSparse(Output_ *sum, bool skip_nan, Index_ subtract=0)
Definition sums.hpp:154
std::vector< Output_ > by_row(const tatami::Matrix< Value_, Index_ > &mat, const Options &sopt)
Definition sums.hpp:344
void apply(bool row, const tatami::Matrix< Value_, Index_ > &mat, Output_ *output, const Options &sopt)
Definition sums.hpp:208
std::vector< Output_ > by_column(const tatami::Matrix< Value_, Index_ > &mat, const Options &sopt)
Definition sums.hpp:304
Output_ direct(const Value_ *ptr, Index_ num, bool skip_nan)
Definition sums.hpp:58
Functions to compute statistics from a tatami::Matrix.
Definition counts.hpp:18
void parallelize(Function_ fun, Index_ tasks, int threads)
Container_ create_container_of_Index_size(Index_ x, Args_ &&... args)
auto consecutive_extractor(const Matrix< Value_, Index_ > &matrix, bool row, Index_ iter_start, Index_ iter_length, Args_ &&... args)
bool sparse_extract_index
bool sparse_ordered_index
Summation options.
Definition sums.hpp:29
int num_threads
Definition sums.hpp:40
bool skip_nan
Definition sums.hpp:34
Utilities for computing matrix statistics.