tatami_stats
Matrix statistics for tatami
Loading...
Searching...
No Matches
counts.hpp
Go to the documentation of this file.
1#ifndef TATAMI_STATS_COUNTS_HPP
2#define TATAMI_STATS_COUNTS_HPP
3
4#include "tatami/tatami.hpp"
5#include "subpar/subpar.hpp"
6
7#include <vector>
8#include <algorithm>
9#include <cmath>
10#include <type_traits>
11
18namespace tatami_stats {
19
24namespace counts {
25
43template<typename Value_, typename Index_, typename Output_, class Condition_>
44void apply(bool row, const tatami::Matrix<Value_, Index_>* p, Output_* output, int num_threads, Condition_ condition) {
45 auto dim = (row ? p->nrow() : p->ncol());
46 auto otherdim = (row ? p->ncol() : p->nrow());
47 std::fill(output, output + dim, 0);
48
49 if (p->prefer_rows() == row) {
50 if (p->sparse()) {
52 opt.sparse_ordered_index = false;
53 bool count_zero = condition(0);
54
55 tatami::parallelize([&](int, Index_ start, Index_ len) -> void {
56 std::vector<Value_> xbuffer(otherdim);
57 std::vector<Index_> ibuffer(otherdim);
58 auto ext = tatami::consecutive_extractor<true>(p, row, start, len, opt);
59
60 for (Index_ x = 0; x < len; ++x) {
61 auto range = ext->fetch(xbuffer.data(), ibuffer.data());
62 Output_ target = 0;
63 for (Index_ j = 0; j < range.number; ++j) {
64 target += condition(range.value[j]);
65 }
66 if (count_zero) {
67 target += otherdim - range.number;
68 }
69 output[x + start] = target;
70 }
71 }, dim, num_threads);
72
73 } else {
74 tatami::parallelize([&](int, Index_ start, Index_ len) -> void {
75 std::vector<Value_> xbuffer(otherdim);
76 auto ext = tatami::consecutive_extractor<false>(p, row, start, len);
77
78 for (Index_ x = 0; x < len; ++x) {
79 auto ptr = ext->fetch(xbuffer.data());
80 Output_ target = 0;
81 for (Index_ j = 0; j < otherdim; ++j) {
82 target += condition(ptr[j]);
83 }
84 output[x + start] = target;
85 }
86 }, dim, num_threads);
87 }
88
89 } else {
90 num_threads = subpar::sanitize_num_workers(num_threads, otherdim); // provides some protection against silly num_threads iputs.
91 std::vector<Output_*> threaded_output_ptrs(num_threads, output);
92 std::vector<std::vector<Output_> > threaded_output(num_threads - 1);
93 for (int t = 1; t < num_threads; ++t) {
94 auto& curout = threaded_output[t - 1];
95 curout.resize(dim);
96 threaded_output_ptrs[t] = curout.data();
97 }
98
99 if (p->sparse()) {
100 tatami::Options opt;
101 opt.sparse_ordered_index = false;
102 bool count_zero = condition(0);
103
104 tatami::parallelize([&](int thread, Index_ start, Index_ len) -> void {
105 std::vector<Value_> xbuffer(dim);
106 std::vector<Index_> ibuffer(dim);
107 auto ext = tatami::consecutive_extractor<true>(p, !row, start, len, opt);
108
109 auto curoutput = threaded_output_ptrs[thread];
110 std::vector<Index_> nonzeros(dim);
111
112 for (Index_ x = 0; x < len; ++x) {
113 auto range = ext->fetch(xbuffer.data(), ibuffer.data());
114 for (Index_ j = 0; j < range.number; ++j) {
115 auto idx = range.index[j];
116 curoutput[idx] += condition(range.value[j]);
117 ++(nonzeros[idx]);
118 }
119 }
120
121 if (count_zero) {
122 for (int d = 0; d < dim; ++d) {
123 curoutput[d] += len - nonzeros[d];
124 }
125 }
126 }, otherdim, num_threads);
127
128 } else {
129 tatami::parallelize([&](int thread, Index_ start, Index_ len) -> void {
130 std::vector<Value_> xbuffer(dim);
131 auto ext = tatami::consecutive_extractor<false>(p, !row, start, len);
132 auto curoutput = threaded_output_ptrs[thread];
133
134 for (Index_ x = 0; x < len; ++x) {
135 auto ptr = ext->fetch(xbuffer.data());
136 for (Index_ j = 0; j < dim; ++j) {
137 curoutput[j] += condition(ptr[j]);
138 }
139 }
140 }, otherdim, num_threads);
141 }
142
143 for (int t = 1; t < num_threads; ++t) {
144 auto curoutput = threaded_output_ptrs[t];
145 for (Index_ d = 0; d < dim; ++d) {
146 output[d] += curoutput[d];
147 }
148 }
149 }
150}
151
156namespace nan {
157
161struct Options {
166 int num_threads = 1;
167};
168
181template<typename Value_, typename Index_, typename Output_>
182void apply(bool row, const tatami::Matrix<Value_, Index_>* p, Output_* output, const Options& nopt) {
183 counts::apply(row, p, output, nopt.num_threads, [](Value_ x) -> bool { return std::isnan(x); });
184}
185
198template<typename Output_ = int, typename Value_, typename Index_>
199std::vector<Output_> by_row(const tatami::Matrix<Value_, Index_>* p, const Options& nopt) {
200 std::vector<Output_> output(p->nrow());
201 apply(true, p, output.data(), nopt);
202 return output;
203}
204
215template<typename Output_ = int, typename Value_, typename Index_>
216std::vector<Output_> by_row(const tatami::Matrix<Value_, Index_>* p) {
217 return by_row(p, Options());
218}
219
233template<typename Output_ = int, typename Value_, typename Index_>
234std::vector<Output_> by_column(const tatami::Matrix<Value_, Index_>* p, const Options& nopt) {
235 std::vector<Output_> output(p->ncol());
236 apply(false, p, output.data(), nopt);
237 return output;
238}
239
252template<typename Output_ = int, typename Value_, typename Index_>
253std::vector<Output_> by_column(const tatami::Matrix<Value_, Index_>* p) {
254 return by_column(p, Options());
255}
256
257}
258
263namespace zero {
264
268struct Options {
273 int num_threads = 1;
274};
275
288template<typename Value_, typename Index_, typename Output_>
289void apply(bool row, const tatami::Matrix<Value_, Index_>* p, Output_* output, const Options& zopt) {
290 counts::apply(row, p, output, zopt.num_threads, [](Value_ x) -> bool { return x == 0; });
291}
292
304template<typename Output_ = int, typename Value_, typename Index_>
305std::vector<Output_> by_row(const tatami::Matrix<Value_, Index_>* p, const Options& zopt) {
306 std::vector<Output_> output(p->nrow());
307 apply(true, p, output.data(), zopt);
308 return output;
309}
310
323template<typename Output_ = int, typename Value_, typename Index_>
324std::vector<Output_> by_row(const tatami::Matrix<Value_, Index_>* p) {
325 return by_row(p, Options());
326}
327
341template<typename Output_ = int, typename Value_, typename Index_>
342std::vector<Output_> by_column(const tatami::Matrix<Value_, Index_>* p, const Options& zopt) {
343 std::vector<Output_> output(p->ncol());
344 apply(false, p, output.data(), zopt);
345 return output;
346}
347
358template<typename Output_ = int, typename Value_, typename Index_>
359std::vector<Output_> by_column(const tatami::Matrix<Value_, Index_>* p) {
360 return by_column(p, Options());
361}
362
363}
364
365}
366
367}
368
369#endif
virtual Index_ ncol() const=0
virtual Index_ nrow() const=0
virtual bool prefer_rows() const=0
virtual std::unique_ptr< MyopicSparseExtractor< Value_, Index_ > > sparse(bool row, const Options &opt) const=0
std::vector< Output_ > by_row(const tatami::Matrix< Value_, Index_ > *p, const Options &nopt)
Definition counts.hpp:199
void apply(bool row, const tatami::Matrix< Value_, Index_ > *p, Output_ *output, const Options &nopt)
Definition counts.hpp:182
std::vector< Output_ > by_column(const tatami::Matrix< Value_, Index_ > *p, const Options &nopt)
Definition counts.hpp:234
std::vector< Output_ > by_row(const tatami::Matrix< Value_, Index_ > *p, const Options &zopt)
Definition counts.hpp:305
std::vector< Output_ > by_column(const tatami::Matrix< Value_, Index_ > *p, const Options &zopt)
Definition counts.hpp:342
void apply(bool row, const tatami::Matrix< Value_, Index_ > *p, Output_ *output, const Options &zopt)
Definition counts.hpp:289
void apply(bool row, const tatami::Matrix< Value_, Index_ > *p, Output_ *output, int num_threads, Condition_ condition)
Definition counts.hpp:44
Functions to compute statistics from a tatami::Matrix.
Definition counts.hpp:18
void parallelize(Function_ fun, Index_ tasks, int threads)
auto consecutive_extractor(const Matrix< Value_, Index_ > *mat, bool row, Index_ iter_start, Index_ iter_length, Args_ &&... args)
bool sparse_ordered_index
NaN-counting options.
Definition counts.hpp:161
int num_threads
Definition counts.hpp:166
Zero-counting options.
Definition counts.hpp:268
int num_threads
Definition counts.hpp:273