tatami_stats
Matrix statistics for tatami
Loading...
Searching...
No Matches
counts.hpp
Go to the documentation of this file.
1#ifndef TATAMI_STATS_COUNTS_HPP
2#define TATAMI_STATS_COUNTS_HPP
3
4#include "tatami/tatami.hpp"
5#include "subpar/subpar.hpp"
6
7#include <vector>
8#include <algorithm>
9#include <cmath>
10#include <type_traits>
11
18namespace tatami_stats {
19
24namespace counts {
25
43template<typename Value_, typename Index_, typename Output_, class Condition_>
44void apply(bool row, const tatami::Matrix<Value_, Index_>* p, Output_* output, int num_threads, Condition_ condition) {
45 auto dim = (row ? p->nrow() : p->ncol());
46 auto otherdim = (row ? p->ncol() : p->nrow());
47 std::fill(output, output + dim, 0);
48
49 if (p->prefer_rows() == row) {
50 if (p->sparse()) {
52 opt.sparse_ordered_index = false;
53 bool count_zero = condition(0);
54
55 tatami::parallelize([&](int, Index_ start, Index_ len) -> void {
56 std::vector<Value_> xbuffer(otherdim);
57 std::vector<Index_> ibuffer(otherdim);
58 auto ext = tatami::consecutive_extractor<true>(p, row, start, len, opt);
59
60 for (Index_ x = 0; x < len; ++x) {
61 auto range = ext->fetch(xbuffer.data(), ibuffer.data());
62 Output_ target = 0;
63 for (Index_ j = 0; j < range.number; ++j) {
64 target += condition(range.value[j]);
65 }
66 if (count_zero) {
67 target += otherdim - range.number;
68 }
69 output[x + start] = target;
70 }
71 }, dim, num_threads);
72
73 } else {
74 tatami::parallelize([&](int, Index_ start, Index_ len) -> void {
75 std::vector<Value_> xbuffer(otherdim);
76 auto ext = tatami::consecutive_extractor<false>(p, row, start, len);
77
78 for (Index_ x = 0; x < len; ++x) {
79 auto ptr = ext->fetch(xbuffer.data());
80 Output_ target = 0;
81 for (Index_ j = 0; j < otherdim; ++j) {
82 target += condition(ptr[j]);
83 }
84 output[x + start] = target;
85 }
86 }, dim, num_threads);
87 }
88
89 } else {
90 num_threads = subpar::sanitize_num_workers(num_threads, otherdim); // provides some protection against silly num_threads iputs.
91 std::vector<Output_*> threaded_output_ptrs(num_threads, output);
92 std::vector<std::vector<Output_> > threaded_output;
93 if (num_threads > 1) {
94 threaded_output.resize(num_threads - 1);
95 for (int t = 1; t < num_threads; ++t) {
96 auto& curout = threaded_output[t - 1];
97 curout.resize(dim);
98 threaded_output_ptrs[t] = curout.data();
99 }
100 }
101
102 if (p->sparse()) {
103 tatami::Options opt;
104 opt.sparse_ordered_index = false;
105 bool count_zero = condition(0);
106
107 tatami::parallelize([&](int thread, Index_ start, Index_ len) -> void {
108 std::vector<Value_> xbuffer(dim);
109 std::vector<Index_> ibuffer(dim);
110 auto ext = tatami::consecutive_extractor<true>(p, !row, start, len, opt);
111
112 auto curoutput = threaded_output_ptrs[thread];
113 std::vector<Index_> nonzeros(dim);
114
115 for (Index_ x = 0; x < len; ++x) {
116 auto range = ext->fetch(xbuffer.data(), ibuffer.data());
117 for (Index_ j = 0; j < range.number; ++j) {
118 auto idx = range.index[j];
119 curoutput[idx] += condition(range.value[j]);
120 ++(nonzeros[idx]);
121 }
122 }
123
124 if (count_zero) {
125 for (int d = 0; d < dim; ++d) {
126 curoutput[d] += len - nonzeros[d];
127 }
128 }
129 }, otherdim, num_threads);
130
131 } else {
132 tatami::parallelize([&](int thread, Index_ start, Index_ len) -> void {
133 std::vector<Value_> xbuffer(dim);
134 auto ext = tatami::consecutive_extractor<false>(p, !row, start, len);
135 auto curoutput = threaded_output_ptrs[thread];
136
137 for (Index_ x = 0; x < len; ++x) {
138 auto ptr = ext->fetch(xbuffer.data());
139 for (Index_ j = 0; j < dim; ++j) {
140 curoutput[j] += condition(ptr[j]);
141 }
142 }
143 }, otherdim, num_threads);
144 }
145
146 for (int t = 1; t < num_threads; ++t) {
147 auto curoutput = threaded_output_ptrs[t];
148 for (Index_ d = 0; d < dim; ++d) {
149 output[d] += curoutput[d];
150 }
151 }
152 }
153}
154
159namespace nan {
160
164struct Options {
169 int num_threads = 1;
170};
171
184template<typename Value_, typename Index_, typename Output_>
185void apply(bool row, const tatami::Matrix<Value_, Index_>* p, Output_* output, const Options& nopt) {
186 counts::apply(row, p, output, nopt.num_threads, [](Value_ x) -> bool { return std::isnan(x); });
187}
188
201template<typename Output_ = int, typename Value_, typename Index_>
202std::vector<Output_> by_row(const tatami::Matrix<Value_, Index_>* p, const Options& nopt) {
203 std::vector<Output_> output(p->nrow());
204 apply(true, p, output.data(), nopt);
205 return output;
206}
207
218template<typename Output_ = int, typename Value_, typename Index_>
219std::vector<Output_> by_row(const tatami::Matrix<Value_, Index_>* p) {
220 return by_row(p, Options());
221}
222
236template<typename Output_ = int, typename Value_, typename Index_>
237std::vector<Output_> by_column(const tatami::Matrix<Value_, Index_>* p, const Options& nopt) {
238 std::vector<Output_> output(p->ncol());
239 apply(false, p, output.data(), nopt);
240 return output;
241}
242
255template<typename Output_ = int, typename Value_, typename Index_>
256std::vector<Output_> by_column(const tatami::Matrix<Value_, Index_>* p) {
257 return by_column(p, Options());
258}
259
260}
261
266namespace zero {
267
271struct Options {
276 int num_threads = 1;
277};
278
291template<typename Value_, typename Index_, typename Output_>
292void apply(bool row, const tatami::Matrix<Value_, Index_>* p, Output_* output, const Options& zopt) {
293 counts::apply(row, p, output, zopt.num_threads, [](Value_ x) -> bool { return x == 0; });
294}
295
307template<typename Output_ = int, typename Value_, typename Index_>
308std::vector<Output_> by_row(const tatami::Matrix<Value_, Index_>* p, const Options& zopt) {
309 std::vector<Output_> output(p->nrow());
310 apply(true, p, output.data(), zopt);
311 return output;
312}
313
326template<typename Output_ = int, typename Value_, typename Index_>
327std::vector<Output_> by_row(const tatami::Matrix<Value_, Index_>* p) {
328 return by_row(p, Options());
329}
330
344template<typename Output_ = int, typename Value_, typename Index_>
345std::vector<Output_> by_column(const tatami::Matrix<Value_, Index_>* p, const Options& zopt) {
346 std::vector<Output_> output(p->ncol());
347 apply(false, p, output.data(), zopt);
348 return output;
349}
350
361template<typename Output_ = int, typename Value_, typename Index_>
362std::vector<Output_> by_column(const tatami::Matrix<Value_, Index_>* p) {
363 return by_column(p, Options());
364}
365
366}
367
368}
369
370}
371
372#endif
virtual Index_ ncol() const=0
virtual Index_ nrow() const=0
virtual bool prefer_rows() const=0
virtual std::unique_ptr< MyopicSparseExtractor< Value_, Index_ > > sparse(bool row, const Options &opt) const=0
std::vector< Output_ > by_row(const tatami::Matrix< Value_, Index_ > *p, const Options &nopt)
Definition counts.hpp:202
void apply(bool row, const tatami::Matrix< Value_, Index_ > *p, Output_ *output, const Options &nopt)
Definition counts.hpp:185
std::vector< Output_ > by_column(const tatami::Matrix< Value_, Index_ > *p, const Options &nopt)
Definition counts.hpp:237
std::vector< Output_ > by_row(const tatami::Matrix< Value_, Index_ > *p, const Options &zopt)
Definition counts.hpp:308
std::vector< Output_ > by_column(const tatami::Matrix< Value_, Index_ > *p, const Options &zopt)
Definition counts.hpp:345
void apply(bool row, const tatami::Matrix< Value_, Index_ > *p, Output_ *output, const Options &zopt)
Definition counts.hpp:292
void apply(bool row, const tatami::Matrix< Value_, Index_ > *p, Output_ *output, int num_threads, Condition_ condition)
Definition counts.hpp:44
Functions to compute statistics from a tatami::Matrix.
Definition counts.hpp:18
void parallelize(Function_ fun, Index_ tasks, int threads)
auto consecutive_extractor(const Matrix< Value_, Index_ > *mat, bool row, Index_ iter_start, Index_ iter_length, Args_ &&... args)
bool sparse_ordered_index
NaN-counting options.
Definition counts.hpp:164
int num_threads
Definition counts.hpp:169
Zero-counting options.
Definition counts.hpp:271
int num_threads
Definition counts.hpp:276