tatami_stats
Matrix statistics for tatami
Loading...
Searching...
No Matches
medians.hpp
Go to the documentation of this file.
1#ifndef TATAMI_STATS_MEDIANS_HPP
2#define TATAMI_STATS_MEDIANS_HPP
3
4#include "utils.hpp"
5
6#include <cmath>
7#include <vector>
8#include <algorithm>
9#include <limits>
10
11#include "tatami/tatami.hpp"
12#include "sanisizer/sanisizer.hpp"
13#include "quickstats/quickstats.hpp"
14
21namespace tatami_stats {
22
27namespace medians {
28
32struct Options {
37 bool skip_nan = false;
38
43 int num_threads = 1;
44};
45
49template<typename Output_ = double, typename Value_, typename Index_>
50Output_ direct(Value_* ptr, Index_ num, bool skip_nan) {
51 ::tatami_stats::internal::nanable_ifelse<Value_>(
52 skip_nan,
53 [&]() -> void {
54 auto lost = shift_nans(ptr, num);
55 ptr += lost;
56 num -= lost;
57 },
58 []() -> void {}
59 );
60
61 return quickstats::median<Output_>(num, ptr);
62}
63
64template<typename Output_ = double, typename Value_, typename Index_>
65Output_ direct(Value_* value, Index_ num_nonzero, Index_ num_all, bool skip_nan) {
66 ::tatami_stats::internal::nanable_ifelse<Value_>(
67 skip_nan,
68 [&]() -> void {
69 auto lost = shift_nans(value, num_nonzero);
70 value += lost;
71 num_nonzero -= lost;
72 num_all -= lost;
73 },
74 []() -> void {}
75 );
76
77 return quickstats::median<Output_>(num_all, num_nonzero, value);
78}
98template<typename Value_, typename Index_, typename Output_>
99void apply(bool row, const tatami::Matrix<Value_, Index_>& mat, Output_* output, const medians::Options& mopt) {
100 auto dim = (row ? mat.nrow() : mat.ncol());
101 auto otherdim = (row ? mat.ncol() : mat.nrow());
102
103 if (mat.sparse()) {
104 tatami::Options opt;
105 opt.sparse_extract_index = false;
106 opt.sparse_ordered_index = false; // we'll be sorting by value anyway.
107
108 tatami::parallelize([&](int, Index_ s, Index_ l) -> void {
109 auto ext = tatami::consecutive_extractor<true>(mat, row, s, l, opt);
111 auto vbuffer = buffer.data();
112 for (Index_ x = 0; x < l; ++x) {
113 auto range = ext->fetch(vbuffer, NULL);
114 tatami::copy_n(range.value, range.number, vbuffer);
115 output[x + s] = direct<Output_>(vbuffer, range.number, otherdim, mopt.skip_nan);
116 }
117 }, dim, mopt.num_threads);
118
119 } else {
120 tatami::parallelize([&](int, Index_ s, Index_ l) -> void {
122 auto ext = tatami::consecutive_extractor<false>(mat, row, s, l);
123 for (Index_ x = 0; x < l; ++x) {
124 auto ptr = ext->fetch(buffer.data());
125 tatami::copy_n(ptr, otherdim, buffer.data());
126 output[x + s] = direct<Output_>(buffer.data(), otherdim, mopt.skip_nan);
127 }
128 }, dim, mopt.num_threads);
129 }
130}
131
135// Back-compatibility.
136template<typename Value_, typename Index_, typename Output_>
137void apply(bool row, const tatami::Matrix<Value_, Index_>* p, Output_* output, const medians::Options& mopt) {
138 apply(row, *p, output, mopt);
139}
157template<typename Output_ = double, typename Value_, typename Index_>
158std::vector<Output_> by_column(const tatami::Matrix<Value_, Index_>& mat, const Options& mopt) {
160 apply(false, mat, output.data(), mopt);
161 return output;
162}
163
167// Back-compatibility.
168template<typename Output_ = double, typename Value_, typename Index_>
169std::vector<Output_> by_column(const tatami::Matrix<Value_, Index_>* p, const Options& mopt) {
170 return by_column<Output_>(*p, mopt);
171}
172
173template<typename Output_ = double, typename Value_, typename Index_>
174std::vector<Output_> by_column(const tatami::Matrix<Value_, Index_>& mat) {
175 return by_column<Output_>(mat, Options());
176}
177
178template<typename Output_ = double, typename Value_, typename Index_>
179std::vector<Output_> by_column(const tatami::Matrix<Value_, Index_>* p) {
180 return by_column<Output_>(*p);
181}
199template<typename Output_ = double, typename Value_, typename Index_>
200std::vector<Output_> by_row(const tatami::Matrix<Value_, Index_>& mat, const Options& mopt) {
202 apply(true, mat, output.data(), mopt);
203 return output;
204}
205
209// Back-compatibility.
210template<typename Output_ = double, typename Value_, typename Index_>
211std::vector<Output_> by_row(const tatami::Matrix<Value_, Index_>* p, const Options& mopt) {
212 return by_row<Output_>(*p, mopt);
213}
214
215template<typename Output_ = double, typename Value_, typename Index_>
216std::vector<Output_> by_row(const tatami::Matrix<Value_, Index_>& mat) {
217 return by_row<Output_>(mat, Options());
218}
219
220template<typename Output_ = double, typename Value_, typename Index_>
221std::vector<Output_> by_row(const tatami::Matrix<Value_, Index_>* p) {
222 return by_row<Output_>(*p);
223}
228}
229
230}
231
232#endif
virtual Index_ ncol() const=0
virtual Index_ nrow() const=0
virtual std::unique_ptr< MyopicSparseExtractor< Value_, Index_ > > sparse(bool row, const Options &opt) const=0
void direct(const Value_ *ptr, Index_ num, const Group_ *group, std::size_t num_groups, const Index_ *group_size, Output_ *output_means, Output_ *output_variances, bool skip_nan, Index_ *valid_group_size)
Definition grouped_variances.hpp:107
std::vector< Output_ > by_row(const tatami::Matrix< Value_, Index_ > &mat, const Options &mopt)
Definition medians.hpp:200
void apply(bool row, const tatami::Matrix< Value_, Index_ > &mat, Output_ *output, const medians::Options &mopt)
Definition medians.hpp:99
std::vector< Output_ > by_column(const tatami::Matrix< Value_, Index_ > &mat, const Options &mopt)
Definition medians.hpp:158
Functions to compute statistics from a tatami::Matrix.
Definition counts.hpp:18
int parallelize(Function_ fun, const Index_ tasks, const int workers)
Value_ * copy_n(const Value_ *const input, const Size_ n, Value_ *const output)
Container_ create_container_of_Index_size(const Index_ x, Args_ &&... args)
auto consecutive_extractor(const Matrix< Value_, Index_ > &matrix, const bool row, const Index_ iter_start, const Index_ iter_length, Args_ &&... args)
bool sparse_extract_index
bool sparse_ordered_index
Median calculation options.
Definition medians.hpp:32
int num_threads
Definition medians.hpp:43
bool skip_nan
Definition medians.hpp:37
Utilities for computing matrix statistics.