tatami_stats
Matrix statistics for tatami
Loading...
Searching...
No Matches
grouped_sums.hpp
Go to the documentation of this file.
1#ifndef TATAMI_STATS_GROUPED_SUMS_HPP
2#define TATAMI_STATS_GROUPED_SUMS_HPP
3
4#include "utils.hpp"
5#include "sums.hpp"
6
7#include <vector>
8#include <algorithm>
9#include <cstddef>
10
11#include "tatami/tatami.hpp"
12#include "sanisizer/sanisizer.hpp"
13
20namespace tatami_stats {
21
26namespace grouped_sums {
27
31struct Options {
36 bool skip_nan = false;
37
42 int num_threads = 1;
43};
44
67template<typename Value_, typename Index_, typename Group_, typename Output_>
68void apply(bool row, const tatami::Matrix<Value_, Index_>& mat, const Group_* group, std::size_t num_groups, Output_** output, const Options& sopt) {
69 Index_ dim = (row ? mat.nrow() : mat.ncol());
70 Index_ otherdim = (row ? mat.ncol() : mat.nrow());
71
72 if (mat.sparse()) {
73 if (mat.prefer_rows() == row) {
74 tatami::parallelize([&](int, Index_ start, Index_ len) -> void {
75 auto ext = tatami::consecutive_extractor<true>(mat, row, start, len);
78 auto tmp = sanisizer::create<std::vector<Output_> >(num_groups);
79
80 for (Index_ i = 0; i < len; ++i) {
81 auto range = ext->fetch(xbuffer.data(), ibuffer.data());
82 std::fill(tmp.begin(), tmp.end(), static_cast<Output_>(0));
83
84 internal::nanable_ifelse<Value_>(
85 sopt.skip_nan,
86 [&]() -> void {
87 for (int j = 0; j < range.number; ++j) {
88 auto val = range.value[j];
89 if (!std::isnan(val)) {
90 tmp[group[range.index[j]]] += val;
91 }
92 }
93 },
94 [&]() -> void {
95 for (int j = 0; j < range.number; ++j) {
96 tmp[group[range.index[j]]] += range.value[j];
97 }
98 }
99 );
100
101 for (decltype(num_groups) g = 0; g < num_groups; ++g) {
102 output[g][i + start] = tmp[g];
103 }
104 }
105 }, dim, sopt.num_threads);
106
107 } else {
108 // Order within each observed vector doesn't affect numerical
109 // precision of the outcome, as addition order for each objective
110 // vector is already well-defined for a running calculation.
111 tatami::Options opt;
112 opt.sparse_ordered_index = false;
113
114 tatami::parallelize([&](int thread, Index_ start, Index_ len) -> void {
115 std::vector<sums::RunningSparse<Output_, Value_, Index_> > runners;
116 runners.reserve(num_groups);
117 std::vector<LocalOutputBuffer<Output_> > local_output;
118 local_output.reserve(num_groups);
119
120 for (decltype(num_groups) g = 0; g < num_groups; ++g) {
121 local_output.emplace_back(thread, start, len, output[g]);
122 runners.emplace_back(local_output.back().data(), sopt.skip_nan, start);
123 }
124
125 auto ext = tatami::consecutive_extractor<true>(mat, !row, static_cast<Index_>(0), otherdim, start, len, opt);
128
129 for (int i = 0; i < otherdim; ++i) {
130 auto range = ext->fetch(xbuffer.data(), ibuffer.data());
131 runners[group[i]].add(range.value, range.index, range.number);
132 }
133
134 for (decltype(num_groups) g = 0; g < num_groups; ++g) {
135 local_output[g].transfer();
136 }
137 }, dim, sopt.num_threads);
138 }
139
140 } else {
141 if (mat.prefer_rows() == row) {
142 tatami::parallelize([&](int, Index_ start, Index_ len) -> void {
143 auto ext = tatami::consecutive_extractor<false>(mat, row, start, len);
145 auto tmp = sanisizer::create<std::vector<Output_> >(num_groups);
146
147 for (Index_ i = 0; i < len; ++i) {
148 auto ptr = ext->fetch(xbuffer.data());
149 std::fill(tmp.begin(), tmp.end(), static_cast<Output_>(0));
150
151 internal::nanable_ifelse<Value_>(
152 sopt.skip_nan,
153 [&]() -> void {
154 for (Index_ j = 0; j < otherdim; ++j) {
155 auto val = ptr[j];
156 if (!std::isnan(val)) {
157 tmp[group[j]] += val;
158 }
159 }
160 },
161 [&]() -> void {
162 for (Index_ j = 0; j < otherdim; ++j) {
163 tmp[group[j]] += ptr[j];
164 }
165 }
166 );
167
168 for (decltype(num_groups) g = 0; g < num_groups; ++g) {
169 output[g][i + start] = tmp[g];
170 }
171 }
172 }, dim, sopt.num_threads);
173
174 } else {
175 tatami::parallelize([&](int thread, Index_ start, Index_ len) -> void {
176 std::vector<sums::RunningDense<Output_, Value_, Index_> > runners;
177 runners.reserve(num_groups);
178 std::vector<LocalOutputBuffer<Output_> > local_output;
179 local_output.reserve(num_groups);
180
181 for (decltype(num_groups) g = 0; g < num_groups; ++g) {
182 local_output.emplace_back(thread, start, len, output[g]);
183 runners.emplace_back(len, local_output.back().data(), sopt.skip_nan);
184 }
185
187 auto ext = tatami::consecutive_extractor<false>(mat, !row, static_cast<Index_>(0), otherdim, start, len);
188
189 for (int i = 0; i < otherdim; ++i) {
190 auto ptr = ext->fetch(xbuffer.data());
191 runners[group[i]].add(ptr);
192 }
193
194 for (decltype(num_groups) g = 0; g < num_groups; ++g) {
195 local_output[g].transfer();
196 }
197 }, dim, sopt.num_threads);
198 }
199 }
200}
201
205// Back-compatibility.
206template<typename Value_, typename Index_, typename Group_, typename Output_>
207void apply(bool row, const tatami::Matrix<Value_, Index_>* p, const Group_* group, std::size_t num_groups, Output_** output, const Options& sopt) {
208 apply(row, *p, group, num_groups, output, sopt);
209}
231template<typename Output_ = double, typename Value_, typename Index_, typename Group_>
232std::vector<std::vector<Output_> > by_row(const tatami::Matrix<Value_, Index_>& mat, const Group_* group, const Options& sopt) {
233 auto mydim = mat.nrow();
234 auto ngroup = total_groups(group, mat.ncol());
235
236 auto output = sanisizer::create<std::vector<std::vector<Output_> > >(ngroup);
237 std::vector<Output_*> ptrs;
238 ptrs.reserve(output.size());
239 for (auto& o : output) {
240 o.resize(mydim);
241 ptrs.push_back(o.data());
242 }
243
244 apply(true, mat, group, ngroup, ptrs.data(), sopt);
245 return output;
246}
247
251// Back-compatibility.
252template<typename Output_ = double, typename Value_, typename Index_, typename Group_>
253std::vector<std::vector<Output_> > by_row(const tatami::Matrix<Value_, Index_>* p, const Group_* group, const Options& sopt) {
254 return by_row<Output_>(*p, group, sopt);
255}
256
257template<typename Output_ = double, typename Value_, typename Index_, typename Group_>
258std::vector<std::vector<Output_> > by_row(const tatami::Matrix<Value_, Index_>& mat, const Group_* group) {
259 return by_row<Output_>(mat, group, Options());
260}
261
262template<typename Output_ = double, typename Value_, typename Index_, typename Group_>
263std::vector<std::vector<Output_> > by_row(const tatami::Matrix<Value_, Index_>* p, const Group_* group) {
264 return by_row<Output_>(*p, group);
265}
287template<typename Output_ = double, typename Value_, typename Index_, typename Group_>
288std::vector<std::vector<Output_> > by_column(const tatami::Matrix<Value_, Index_>& mat, const Group_* group, const Options& sopt) {
289 auto mydim = mat.ncol();
290 auto ngroup = total_groups(group, mat.nrow());
291
292 auto output = sanisizer::create<std::vector<std::vector<Output_> > >(ngroup);
293 std::vector<Output_*> ptrs;
294 ptrs.reserve(output.size());
295 for (auto& o : output) {
296 o.resize(mydim);
297 ptrs.push_back(o.data());
298 }
299
300 apply(false, mat, group, ngroup, ptrs.data(), sopt);
301 return output;
302}
303
307// Back-compatibility.
308template<typename Output_ = double, typename Value_, typename Index_, typename Group_>
309std::vector<std::vector<Output_> > by_column(const tatami::Matrix<Value_, Index_>* p, const Group_* group, const Options& sopt) {
310 return by_column<Output_>(*p, group, sopt);
311}
312
313template<typename Output_ = double, typename Value_, typename Index_, typename Group_>
314std::vector<std::vector<Output_> > by_column(const tatami::Matrix<Value_, Index_>& mat, const Group_* group) {
315 return by_column<Output_>(mat, group, Options());
316}
317
318template<typename Output_ = double, typename Value_, typename Index_, typename Group_>
319std::vector<std::vector<Output_> > by_column(const tatami::Matrix<Value_, Index_>* p, const Group_* group) {
320 return by_column<Output_>(*p, group);
321}
326}
327
328}
329
330#endif
virtual Index_ ncol() const=0
virtual Index_ nrow() const=0
virtual bool prefer_rows() const=0
virtual std::unique_ptr< MyopicSparseExtractor< Value_, Index_ > > sparse(bool row, const Options &opt) const=0
std::vector< Output_ > by_row(const tatami::Matrix< Value_, Index_ > &mat, const Options &nopt)
Definition counts.hpp:229
std::vector< Output_ > by_column(const tatami::Matrix< Value_, Index_ > &mat, const Options &nopt)
Definition counts.hpp:288
void apply(bool row, const tatami::Matrix< Value_, Index_ > &mat, Output_ *output, int num_threads, Condition_ condition)
Definition counts.hpp:44
void apply(bool row, const tatami::Matrix< Value_, Index_ > &mat, const Group_ *group, std::size_t num_groups, Output_ **output, const Options &sopt)
Definition grouped_sums.hpp:68
Functions to compute statistics from a tatami::Matrix.
Definition counts.hpp:18
std::size_t total_groups(const Group_ *group, std::size_t n)
Definition utils.hpp:32
void parallelize(Function_ fun, Index_ tasks, int threads)
Container_ create_container_of_Index_size(Index_ x, Args_ &&... args)
auto consecutive_extractor(const Matrix< Value_, Index_ > &matrix, bool row, Index_ iter_start, Index_ iter_length, Args_ &&... args)
bool sparse_ordered_index
Grouped summation options.
Definition grouped_sums.hpp:31
bool skip_nan
Definition grouped_sums.hpp:36
int num_threads
Definition grouped_sums.hpp:42
Compute row and column sums from a tatami::Matrix.
Utilities for computing matrix statistics.