tatami_stats
Matrix statistics for tatami
Loading...
Searching...
No Matches
counts.hpp
Go to the documentation of this file.
1#ifndef TATAMI_STATS_COUNTS_HPP
2#define TATAMI_STATS_COUNTS_HPP
3
4#include <vector>
5#include <algorithm>
6#include <cmath>
7#include <type_traits>
8
9#include "tatami/tatami.hpp"
10#include "sanisizer/sanisizer.hpp"
11
18namespace tatami_stats {
19
24namespace counts {
25
44template<typename Value_, typename Index_, typename Output_, class Condition_>
45void apply(bool row, const tatami::Matrix<Value_, Index_>& mat, Output_* output, int num_threads, Condition_ condition) {
46 const Index_ dim = (row ? mat.nrow() : mat.ncol());
47 const Index_ otherdim = (row ? mat.ncol() : mat.nrow());
48
49 if (mat.prefer_rows() == row) {
50 if (mat.sparse()) {
52 opt.sparse_ordered_index = false;
53 const bool count_zero = condition(0);
54
55 tatami::parallelize([&](int, Index_ start, Index_ len) -> void {
58 auto ext = tatami::consecutive_extractor<true>(mat, row, start, len, opt);
59
60 for (Index_ x = 0; x < len; ++x) {
61 auto range = ext->fetch(xbuffer.data(), ibuffer.data());
62 Output_ target = 0;
63 for (Index_ j = 0; j < range.number; ++j) {
64 target += condition(range.value[j]);
65 }
66 if (count_zero) {
67 target += otherdim - range.number;
68 }
69 output[x + start] = target;
70 }
71 }, dim, num_threads);
72
73 } else {
74 tatami::parallelize([&](int, Index_ start, Index_ len) -> void {
76 auto ext = tatami::consecutive_extractor<false>(mat, row, start, len);
77
78 for (Index_ x = 0; x < len; ++x) {
79 auto ptr = ext->fetch(xbuffer.data());
80 Output_ target = 0;
81 for (Index_ j = 0; j < otherdim; ++j) {
82 target += condition(ptr[j]);
83 }
84 output[x + start] = target;
85 }
86 }, dim, num_threads);
87 }
88
89 } else {
90 // Directly write the result to the output buffer for the first thread, everything else goes into these temporary vectors.
91 auto threaded_output = sanisizer::create<std::vector<std::optional<std::vector<Output_> > > >(num_threads <= 1 ? 0 : num_threads - 1);
92 const auto get_output_ptr = [&](const int thread, std::optional<std::vector<Output_> >& tmp_output) -> Output_* {
93 if (thread) {
94 tmp_output.emplace(tatami::cast_Index_to_container_size<std::vector<Output_> >(dim));
95 return tmp_output->data();
96 } else {
97 return output;
98 }
99 };
100 const auto save_output = [&](const int thread, std::optional<std::vector<Output_> >& tmp_output) -> void {
101 if (thread) {
102 threaded_output[thread - 1] = std::move(tmp_output);
103 }
104 };
105
106 int num_used;
107 if (mat.sparse()) {
108 tatami::Options opt;
109 opt.sparse_ordered_index = false;
110 bool count_zero = condition(0);
111
112 num_used = tatami::parallelize([&](int thread, Index_ start, Index_ len) -> void {
113 std::optional<std::vector<Output_> > tmp_output;
114 auto curoutput = get_output_ptr(thread, tmp_output);
115
118 auto ext = tatami::consecutive_extractor<true>(mat, !row, start, len, opt);
120
121 for (Index_ x = 0; x < len; ++x) {
122 auto range = ext->fetch(xbuffer.data(), ibuffer.data());
123 for (Index_ j = 0; j < range.number; ++j) {
124 auto idx = range.index[j];
125 curoutput[idx] += condition(range.value[j]);
126 ++(nonzeros[idx]);
127 }
128 }
129
130 if (count_zero) {
131 for (Index_ d = 0; d < dim; ++d) {
132 curoutput[d] += len - nonzeros[d];
133 }
134 }
135
136 save_output(thread, tmp_output);
137 }, otherdim, num_threads);
138
139 } else {
140 num_used = tatami::parallelize([&](int thread, Index_ start, Index_ len) -> void {
141 std::optional<std::vector<Output_> > tmp_output;
142 auto curoutput = get_output_ptr(thread, tmp_output);
143
145 auto ext = tatami::consecutive_extractor<false>(mat, !row, start, len);
146
147 for (Index_ x = 0; x < len; ++x) {
148 auto ptr = ext->fetch(xbuffer.data());
149 for (Index_ j = 0; j < dim; ++j) {
150 curoutput[j] += condition(ptr[j]);
151 }
152 }
153
154 save_output(thread, tmp_output);
155 }, otherdim, num_threads);
156 }
157
158 if (num_used == 0) {
159 // Make sure we reset it if no workers were run.
160 std::fill(output, output + dim, 0);
161 } else {
162 for (int thread = 1; thread < num_used; ++thread) {
163 const auto& curout = *(threaded_output[thread - 1]);
164 for (Index_ d = 0; d < dim; ++d) {
165 output[d] += curout[d];
166 }
167 }
168 }
169 }
170}
171
175// Back-compatibility only.
176template<typename Value_, typename Index_, typename Output_, class Condition_>
177void apply(bool row, const tatami::Matrix<Value_, Index_>* p, Output_* output, int num_threads, Condition_ condition) {
178 apply(row, *p, output, num_threads, std::move(condition));
179}
188namespace nan {
189
193struct Options {
198 int num_threads = 1;
199
200};
201
214template<typename Value_, typename Index_, typename Output_>
215void apply(bool row, const tatami::Matrix<Value_, Index_>& mat, Output_* output, const Options& nopt) {
216 counts::apply(row, mat, output, nopt.num_threads, [](Value_ x) -> bool { return std::isnan(x); });
217}
218
222// Back-compatibility only.
223template<typename Value_, typename Index_, typename Output_>
224void apply(bool row, const tatami::Matrix<Value_, Index_>* p, Output_* output, const Options& nopt) {
225 apply(row, *p, output, nopt);
226}
244template<typename Output_ = int, typename Value_, typename Index_>
245std::vector<Output_> by_row(const tatami::Matrix<Value_, Index_>& mat, const Options& nopt) {
247 apply(true, mat, output.data(), nopt);
248 return output;
249}
250
254// Back-compatibility only.
255template<typename Output_ = int, typename Value_, typename Index_>
256std::vector<Output_> by_row(const tatami::Matrix<Value_, Index_>* p, const Options& nopt) {
257 return by_row<Output_>(*p, nopt);
258}
274template<typename Output_ = int, typename Value_, typename Index_>
275std::vector<Output_> by_row(const tatami::Matrix<Value_, Index_>& mat) {
276 return by_row<Output_>(mat, Options());
277}
278
282// Back-compatibility only.
283template<typename Output_ = int, typename Value_, typename Index_>
284std::vector<Output_> by_row(const tatami::Matrix<Value_, Index_>* p) {
285 return by_row<Output_>(*p);
286}
304template<typename Output_ = int, typename Value_, typename Index_>
305std::vector<Output_> by_column(const tatami::Matrix<Value_, Index_>& mat, const Options& nopt) {
307 apply(false, mat, output.data(), nopt);
308 return output;
309}
310
314// Back-compatibility only.
315template<typename Output_ = int, typename Value_, typename Index_>
316std::vector<Output_> by_column(const tatami::Matrix<Value_, Index_>* p, const Options& nopt) {
317 return by_column<Output_>(*p, nopt);
318}
335template<typename Output_ = int, typename Value_, typename Index_>
336std::vector<Output_> by_column(const tatami::Matrix<Value_, Index_>& mat) {
337 return by_column<Output_>(mat, Options());
338}
339
343// Back-compatibility only.
344template<typename Output_ = int, typename Value_, typename Index_>
345std::vector<Output_> by_column(const tatami::Matrix<Value_, Index_>* p) {
346 return by_column<Output_>(*p);
347}
352}
353
358namespace zero {
359
363struct Options {
368 int num_threads = 1;
369};
370
383template<typename Value_, typename Index_, typename Output_>
384void apply(bool row, const tatami::Matrix<Value_, Index_>& mat, Output_* output, const Options& zopt) {
385 counts::apply(row, mat, output, zopt.num_threads, [](Value_ x) -> bool { return x == 0; });
386}
387
391// Back-compatibility.
392template<typename Value_, typename Index_, typename Output_>
393void apply(bool row, const tatami::Matrix<Value_, Index_>* p, Output_* output, const Options& zopt) {
394 apply(row, *p, output, zopt);
395}
411template<typename Output_ = int, typename Value_, typename Index_>
412std::vector<Output_> by_row(const tatami::Matrix<Value_, Index_>& mat, const Options& zopt) {
414 apply(true, mat, output.data(), zopt);
415 return output;
416}
417
421// Back-compatibility.
422template<typename Output_ = int, typename Value_, typename Index_>
423std::vector<Output_> by_row(const tatami::Matrix<Value_, Index_>* p, const Options& zopt) {
424 return by_row<Output_>(*p, zopt);
425}
442template<typename Output_ = int, typename Value_, typename Index_>
443std::vector<Output_> by_row(const tatami::Matrix<Value_, Index_>& mat) {
444 return by_row<Output_>(mat, Options());
445}
446
450// Back-compatibility.
451template<typename Output_ = int, typename Value_, typename Index_>
452std::vector<Output_> by_row(const tatami::Matrix<Value_, Index_>* p) {
453 return by_row<Output_>(*p);
454}
472template<typename Output_ = int, typename Value_, typename Index_>
473std::vector<Output_> by_column(const tatami::Matrix<Value_, Index_>& mat, const Options& zopt) {
475 apply(false, mat, output.data(), zopt);
476 return output;
477}
478
482// Back-compatibility.
483template<typename Output_ = int, typename Value_, typename Index_>
484std::vector<Output_> by_column(const tatami::Matrix<Value_, Index_>* p, const Options& zopt) {
485 return by_column<Output_>(*p, zopt);
486}
502template<typename Output_ = int, typename Value_, typename Index_>
503std::vector<Output_> by_column(const tatami::Matrix<Value_, Index_>& mat) {
504 return by_column<Output_>(mat, Options());
505}
506
510// Back-compatibility.
511template<typename Output_ = int, typename Value_, typename Index_>
512std::vector<Output_> by_column(const tatami::Matrix<Value_, Index_>* p) {
513 return by_column<Output_>(*p);
514}
519}
520
521}
522
523}
524
525#endif
virtual Index_ ncol() const=0
virtual Index_ nrow() const=0
virtual bool prefer_rows() const=0
virtual std::unique_ptr< MyopicSparseExtractor< Value_, Index_ > > sparse(bool row, const Options &opt) const=0
std::vector< Output_ > by_row(const tatami::Matrix< Value_, Index_ > &mat, const Options &nopt)
Definition counts.hpp:245
std::vector< Output_ > by_column(const tatami::Matrix< Value_, Index_ > &mat, const Options &nopt)
Definition counts.hpp:305
void apply(bool row, const tatami::Matrix< Value_, Index_ > &mat, Output_ *output, const Options &nopt)
Definition counts.hpp:215
void apply(bool row, const tatami::Matrix< Value_, Index_ > &mat, Output_ *output, const Options &zopt)
Definition counts.hpp:384
std::vector< Output_ > by_column(const tatami::Matrix< Value_, Index_ > &mat, const Options &zopt)
Definition counts.hpp:473
std::vector< Output_ > by_row(const tatami::Matrix< Value_, Index_ > &mat, const Options &zopt)
Definition counts.hpp:412
void apply(bool row, const tatami::Matrix< Value_, Index_ > &mat, Output_ *output, int num_threads, Condition_ condition)
Definition counts.hpp:45
Functions to compute statistics from a tatami::Matrix.
Definition counts.hpp:18
int parallelize(Function_ fun, const Index_ tasks, const int workers)
I< decltype(std::declval< Container_ >().size())> cast_Index_to_container_size(const Index_ x)
Container_ create_container_of_Index_size(const Index_ x, Args_ &&... args)
auto consecutive_extractor(const Matrix< Value_, Index_ > &matrix, const bool row, const Index_ iter_start, const Index_ iter_length, Args_ &&... args)
bool sparse_ordered_index
NaN-counting options.
Definition counts.hpp:193
int num_threads
Definition counts.hpp:198
Zero-counting options.
Definition counts.hpp:363
int num_threads
Definition counts.hpp:368