tatami_stats
Matrix statistics for tatami
Loading...
Searching...
No Matches
grouped_sums.hpp
Go to the documentation of this file.
1#ifndef TATAMI_STATS_GROUPED_SUMS_HPP
2#define TATAMI_STATS_GROUPED_SUMS_HPP
3
4#include "utils.hpp"
5#include "tatami/tatami.hpp"
6#include "sums.hpp"
7#include <vector>
8#include <algorithm>
9
16namespace tatami_stats {
17
22namespace grouped_sums {
23
27struct Options {
32 bool skip_nan = false;
33
38 int num_threads = 1;
39};
40
63template<typename Value_, typename Index_, typename Group_, typename Output_>
64void apply(bool row, const tatami::Matrix<Value_, Index_>* p, const Group_* group, size_t num_groups, Output_** output, const Options& sopt) {
65 Index_ dim = (row ? p->nrow() : p->ncol());
66 Index_ otherdim = (row ? p->ncol() : p->nrow());
67
68 if (p->sparse()) {
69 if (p->prefer_rows() == row) {
70 tatami::parallelize([&](int, Index_ start, Index_ len) -> void {
71 auto ext = tatami::consecutive_extractor<true>(p, row, start, len);
72 std::vector<Value_> xbuffer(otherdim);
73 std::vector<Index_> ibuffer(otherdim);
74 std::vector<Output_> tmp(num_groups);
75
76 for (Index_ i = 0; i < len; ++i) {
77 auto range = ext->fetch(xbuffer.data(), ibuffer.data());
78 std::fill(tmp.begin(), tmp.end(), static_cast<Output_>(0));
79
80 internal::nanable_ifelse<Value_>(
81 sopt.skip_nan,
82 [&]() {
83 for (int j = 0; j < range.number; ++j) {
84 auto val = range.value[j];
85 if (!std::isnan(val)) {
86 tmp[group[range.index[j]]] += val;
87 }
88 }
89 },
90 [&]() {
91 for (int j = 0; j < range.number; ++j) {
92 tmp[group[range.index[j]]] += range.value[j];
93 }
94 }
95 );
96
97 for (size_t g = 0; g < num_groups; ++g) {
98 output[g][i + start] = tmp[g];
99 }
100 }
101 }, dim, sopt.num_threads);
102
103 } else {
104 // Order within each observed vector doesn't affect numerical
105 // precision of the outcome, as addition order for each objective
106 // vector is already well-defined for a running calculation.
107 tatami::Options opt;
108 opt.sparse_ordered_index = false;
109
110 tatami::parallelize([&](int thread, Index_ start, Index_ len) -> void {
111 std::vector<sums::RunningSparse<Output_, Value_, Index_> > runners;
112 runners.reserve(num_groups);
113 std::vector<LocalOutputBuffer<Output_> > local_output;
114 local_output.reserve(num_groups);
115
116 for (size_t g = 0; g < num_groups; ++g) {
117 local_output.emplace_back(thread, start, len, output[g]);
118 runners.emplace_back(local_output.back().data(), sopt.skip_nan, start);
119 }
120
121 auto ext = tatami::consecutive_extractor<true>(p, !row, static_cast<Index_>(0), otherdim, start, len, opt);
122 std::vector<Value_> xbuffer(len);
123 std::vector<Index_> ibuffer(len);
124
125 for (int i = 0; i < otherdim; ++i) {
126 auto range = ext->fetch(xbuffer.data(), ibuffer.data());
127 runners[group[i]].add(range.value, range.index, range.number);
128 }
129
130 for (size_t g = 0; g < num_groups; ++g) {
131 local_output[g].transfer();
132 }
133 }, dim, sopt.num_threads);
134 }
135
136 } else {
137 if (p->prefer_rows() == row) {
138 tatami::parallelize([&](int, Index_ start, Index_ len) -> void {
139 auto ext = tatami::consecutive_extractor<false>(p, row, start, len);
140 std::vector<Value_> xbuffer(otherdim);
141 std::vector<Output_> tmp(num_groups);
142
143 for (Index_ i = 0; i < len; ++i) {
144 auto ptr = ext->fetch(xbuffer.data());
145 std::fill(tmp.begin(), tmp.end(), static_cast<Output_>(0));
146
147 internal::nanable_ifelse<Value_>(
148 sopt.skip_nan,
149 [&]() {
150 for (Index_ j = 0; j < otherdim; ++j) {
151 auto val = ptr[j];
152 if (!std::isnan(val)) {
153 tmp[group[j]] += val;
154 }
155 }
156 },
157 [&]() {
158 for (Index_ j = 0; j < otherdim; ++j) {
159 tmp[group[j]] += ptr[j];
160 }
161 }
162 );
163
164 for (size_t g = 0; g < num_groups; ++g) {
165 output[g][i + start] = tmp[g];
166 }
167 }
168 }, dim, sopt.num_threads);
169
170 } else {
171 tatami::parallelize([&](int thread, Index_ start, Index_ len) -> void {
172 std::vector<sums::RunningDense<Output_, Value_, Index_> > runners;
173 runners.reserve(num_groups);
174 std::vector<LocalOutputBuffer<Output_> > local_output;
175 local_output.reserve(num_groups);
176
177 for (size_t g = 0; g < num_groups; ++g) {
178 local_output.emplace_back(thread, start, len, output[g]);
179 runners.emplace_back(len, local_output.back().data(), sopt.skip_nan);
180 }
181
182 std::vector<Value_> xbuffer(len);
183 auto ext = tatami::consecutive_extractor<false>(p, !row, static_cast<Index_>(0), otherdim, start, len);
184
185 for (int i = 0; i < otherdim; ++i) {
186 auto ptr = ext->fetch(xbuffer.data());
187 runners[group[i]].add(ptr);
188 }
189
190 for (size_t g = 0; g < num_groups; ++g) {
191 local_output[g].transfer();
192 }
193 }, dim, sopt.num_threads);
194 }
195 }
196}
197
215template<typename Output_ = double, typename Value_, typename Index_, typename Group_>
216std::vector<std::vector<Output_> > by_row(const tatami::Matrix<Value_, Index_>* p, const Group_* group, const Options& sopt) {
217 size_t mydim = p->nrow();
218 auto ngroup = total_groups(group, p->ncol());
219
220 std::vector<std::vector<Output_> > output(ngroup);
221 std::vector<Output_*> ptrs;
222 ptrs.reserve(output.size());
223 for (auto& o : output) {
224 o.resize(mydim);
225 ptrs.push_back(o.data());
226 }
227
228 apply(true, p, group, ngroup, ptrs.data(), sopt);
229 return output;
230}
231
248template<typename Output_ = double, typename Value_, typename Index_, typename Group_>
249std::vector<std::vector<Output_> > by_row(const tatami::Matrix<Value_, Index_>* p, const Group_* group) {
250 return by_row(p, group, Options());
251}
252
270template<typename Output_ = double, typename Value_, typename Index_, typename Group_>
271std::vector<std::vector<Output_> > by_column(const tatami::Matrix<Value_, Index_>* p, const Group_* group, const Options& sopt) {
272 size_t mydim = p->ncol();
273 auto ngroup = total_groups(group, p->nrow());
274
275 std::vector<std::vector<Output_> > output(ngroup);
276 std::vector<Output_*> ptrs;
277 ptrs.reserve(output.size());
278 for (auto& o : output) {
279 o.resize(mydim);
280 ptrs.push_back(o.data());
281 }
282
283 apply(false, p, group, ngroup, ptrs.data(), sopt);
284 return output;
285}
286
303template<typename Output_ = double, typename Value_, typename Index_, typename Group_>
304std::vector<std::vector<Output_> > by_column(const tatami::Matrix<Value_, Index_>* p, const Group_* group) {
305 return by_column(p, group, Options());
306}
307
308}
309
310}
311
312#endif
virtual Index_ ncol() const=0
virtual Index_ nrow() const=0
virtual bool prefer_rows() const=0
virtual std::unique_ptr< MyopicSparseExtractor< Value_, Index_ > > sparse(bool row, const Options &opt) const=0
void apply(bool row, const tatami::Matrix< Value_, Index_ > *p, const Group_ *group, size_t num_groups, Output_ **output, const Options &sopt)
Definition grouped_sums.hpp:64
Functions to compute statistics from a tatami::Matrix.
Definition counts.hpp:18
size_t total_groups(const Group_ *group, size_t n)
Definition utils.hpp:28
void parallelize(Function_ fun, Index_ tasks, int threads)
auto consecutive_extractor(const Matrix< Value_, Index_ > *mat, bool row, Index_ iter_start, Index_ iter_length, Args_ &&... args)
bool sparse_ordered_index
Grouped summation options.
Definition grouped_sums.hpp:27
bool skip_nan
Definition grouped_sums.hpp:32
int num_threads
Definition grouped_sums.hpp:38
Compute row and column sums from a tatami::Matrix.
Utilities for computing matrix statistics.