tatami_stats
Matrix statistics for tatami
Loading...
Searching...
No Matches
grouped_medians.hpp
Go to the documentation of this file.
1#ifndef TATAMI_STATS_GROUPED_MEDIANS_HPP
2#define TATAMI_STATS_GROUPED_MEDIANS_HPP
3
4#include "utils.hpp"
5#include "medians.hpp"
6
7#include <vector>
8#include <algorithm>
9
10#include "tatami/tatami.hpp"
11#include "sanisizer/sanisizer.hpp"
12
19namespace tatami_stats {
20
25namespace grouped_medians {
26
30struct Options {
35 bool skip_nan = false;
36
41 int num_threads = 1;
42};
43
67template<typename Value_, typename Index_, typename Group_, class GroupSizes_, typename Output_>
68void apply(bool row, const tatami::Matrix<Value_, Index_>& mat, const Group_* group, const GroupSizes_& group_sizes, Output_** output, const Options& mopt) {
69 Index_ dim = (row ? mat.nrow() : mat.ncol());
70 Index_ otherdim = (row ? mat.ncol() : mat.nrow());
71
72 tatami::parallelize([&](int, Index_ start, Index_ len) -> void {
74
75 auto ngroups = group_sizes.size();
76 auto workspace = sanisizer::create<std::vector<std::vector<Value_> > >(ngroups);
77 for (decltype(ngroups) g = 0; g < ngroups; ++g) {
78 workspace[g].reserve(group_sizes[g]);
79 }
80
81 if (mat.sparse()) {
83 opt.sparse_ordered_index = false;
84
85 auto ext = tatami::consecutive_extractor<true>(mat, row, start, len, opt);
87 for (Index_ i = 0; i < len; ++i) {
88 auto range = ext->fetch(xbuffer.data(), ibuffer.data());
89 for (Index_ j = 0; j < range.number; ++j) {
90 workspace[group[range.index[j]]].push_back(range.value[j]);
91 }
92
93 for (decltype(ngroups) g = 0; g < ngroups; ++g) {
94 auto& w = workspace[g];
95 output[g][i + start] = medians::direct<Output_, Value_, Index_>(w.data(), w.size(), group_sizes[g], mopt.skip_nan);
96 w.clear();
97 }
98 }
99
100 } else {
101 auto ext = tatami::consecutive_extractor<false>(mat, row, start, len);
102 for (Index_ i = 0; i < len; ++i) {
103 auto ptr = ext->fetch(xbuffer.data());
104 for (Index_ j = 0; j < otherdim; ++j) {
105 workspace[group[j]].push_back(ptr[j]);
106 }
107
108 for (decltype(ngroups) g = 0; g < ngroups; ++g) {
109 auto& w = workspace[g];
110 output[g][i + start] = medians::direct<Output_, Value_, Index_>(w.data(), w.size(), mopt.skip_nan);
111 w.clear();
112 }
113 }
114 }
115 }, dim, mopt.num_threads);
116}
117
121// Back-compatibility.
122template<typename Value_, typename Index_, typename Group_, class GroupSizes_, typename Output_>
123void apply(bool row, const tatami::Matrix<Value_, Index_>* p, const Group_* group, const GroupSizes_& group_sizes, Output_** output, const Options& mopt) {
124 apply(row, *p, group, group_sizes, output, mopt);
125}
147template<typename Output_ = double, typename Value_, typename Index_, typename Group_>
148std::vector<std::vector<Output_> > by_row(const tatami::Matrix<Value_, Index_>& mat, const Group_* group, const Options& mopt) {
149 auto mydim = mat.nrow();
150 auto group_sizes = tabulate_groups(group, mat.ncol());
151
152 auto output = sanisizer::create<std::vector<std::vector<Output_> > >(group_sizes.size());
153 std::vector<Output_*> ptrs;
154 ptrs.reserve(output.size());
155 for (auto& o : output) {
156 o.resize(mydim);
157 ptrs.push_back(o.data());
158 }
159
160 apply(true, mat, group, group_sizes, ptrs.data(), mopt);
161 return output;
162}
163
167template<typename Output_ = double, typename Value_, typename Index_, typename Group_>
168std::vector<std::vector<Output_> > by_row(const tatami::Matrix<Value_, Index_>* p, const Group_* group, const Options& mopt) {
169 return by_row<Output_>(*p, group, mopt);
170}
171
172template<typename Output_ = double, typename Value_, typename Index_, typename Group_>
173std::vector<std::vector<Output_> > by_row(const tatami::Matrix<Value_, Index_>& mat, const Group_* group) {
174 return by_row<Output_>(mat, group, Options());
175}
176
177template<typename Output_ = double, typename Value_, typename Index_, typename Group_>
178std::vector<std::vector<Output_> > by_row(const tatami::Matrix<Value_, Index_>* p, const Group_* group) {
179 return by_row<Output_>(*p, group);
180}
202template<typename Output_ = double, typename Value_, typename Index_, typename Group_>
203std::vector<std::vector<Output_> > by_column(const tatami::Matrix<Value_, Index_>& mat, const Group_* group, const Options& mopt) {
204 auto mydim = mat.ncol();
205 auto group_sizes = tabulate_groups(group, mat.nrow());
206
207 auto output = sanisizer::create<std::vector<std::vector<Output_> > >(group_sizes.size());
208 std::vector<Output_*> ptrs;
209 ptrs.reserve(output.size());
210 for (auto& o : output) {
211 o.resize(mydim);
212 ptrs.push_back(o.data());
213 }
214
215 apply(false, mat, group, group_sizes, ptrs.data(), mopt);
216 return output;
217}
218
222// Back-compatibility.
223template<typename Output_ = double, typename Value_, typename Index_, typename Group_>
224std::vector<std::vector<Output_> > by_column(const tatami::Matrix<Value_, Index_>* p, const Group_* group, const Options& mopt) {
225 return by_column<Output_>(*p, group, mopt);
226}
227
228template<typename Output_ = double, typename Value_, typename Index_, typename Group_>
229std::vector<std::vector<Output_> > by_column(const tatami::Matrix<Value_, Index_>& mat, const Group_* group) {
230 return by_column<Output_>(mat, group, Options());
231}
232
233template<typename Output_ = double, typename Value_, typename Index_, typename Group_>
234std::vector<std::vector<Output_> > by_column(const tatami::Matrix<Value_, Index_>* p, const Group_* group) {
235 return by_column<Output_>(*p, group);
236}
241}
242
243}
244
245#endif
virtual Index_ ncol() const=0
virtual Index_ nrow() const=0
virtual std::unique_ptr< MyopicSparseExtractor< Value_, Index_ > > sparse(bool row, const Options &opt) const=0
Compute row and column medians from a tatami::Matrix.
std::vector< std::vector< Output_ > > by_column(const tatami::Matrix< Value_, Index_ > &mat, const Group_ *group, const Options &mopt)
Definition grouped_medians.hpp:203
void apply(bool row, const tatami::Matrix< Value_, Index_ > &mat, const Group_ *group, const GroupSizes_ &group_sizes, Output_ **output, const Options &mopt)
Definition grouped_medians.hpp:68
std::vector< std::vector< Output_ > > by_row(const tatami::Matrix< Value_, Index_ > &mat, const Group_ *group, const Options &mopt)
Definition grouped_medians.hpp:148
Output_ direct(Value_ *ptr, Index_ num, bool skip_nan)
Definition medians.hpp:83
Functions to compute statistics from a tatami::Matrix.
Definition counts.hpp:18
std::vector< Size_ > tabulate_groups(const Group_ *group, Size_ n)
Definition utils.hpp:53
void parallelize(Function_ fun, Index_ tasks, int threads)
Container_ create_container_of_Index_size(Index_ x, Args_ &&... args)
auto consecutive_extractor(const Matrix< Value_, Index_ > &matrix, bool row, Index_ iter_start, Index_ iter_length, Args_ &&... args)
bool sparse_ordered_index
Grouped median calculation options.
Definition grouped_medians.hpp:30
int num_threads
Definition grouped_medians.hpp:41
bool skip_nan
Definition grouped_medians.hpp:35
Utilities for computing matrix statistics.