tatami_stats
Matrix statistics for tatami
Loading...
Searching...
No Matches
grouped_medians.hpp
Go to the documentation of this file.
1#ifndef TATAMI_STATS_GROUPED_MEDIANS_HPP
2#define TATAMI_STATS_GROUPED_MEDIANS_HPP
3
4#include "utils.hpp"
5#include "medians.hpp"
6
7#include <vector>
8#include <algorithm>
9
10#include "tatami/tatami.hpp"
11#include "sanisizer/sanisizer.hpp"
12
19namespace tatami_stats {
20
25namespace grouped_medians {
26
30struct Options {
35 bool skip_nan = false;
36
41 int num_threads = 1;
42};
43
66template<typename Value_, typename Index_, typename Group_, class GroupSizes_, typename Output_>
67void apply(bool row, const tatami::Matrix<Value_, Index_>& mat, const Group_* group, const GroupSizes_& group_sizes, Output_** output, const Options& mopt) {
68 Index_ dim = (row ? mat.nrow() : mat.ncol());
69 Index_ otherdim = (row ? mat.ncol() : mat.nrow());
70
71 tatami::parallelize([&](int, Index_ start, Index_ len) -> void {
73
74 auto ngroups = group_sizes.size();
75 auto workspace = sanisizer::create<std::vector<std::vector<Value_> > >(ngroups);
76 for (I<decltype(ngroups)> g = 0; g < ngroups; ++g) {
77 workspace[g].reserve(group_sizes[g]);
78 }
79
80 if (mat.sparse()) {
82 opt.sparse_ordered_index = false;
83
84 auto ext = tatami::consecutive_extractor<true>(mat, row, start, len, opt);
86 for (Index_ i = 0; i < len; ++i) {
87 auto range = ext->fetch(xbuffer.data(), ibuffer.data());
88 for (Index_ j = 0; j < range.number; ++j) {
89 workspace[group[range.index[j]]].push_back(range.value[j]);
90 }
91
92 for (I<decltype(ngroups)> g = 0; g < ngroups; ++g) {
93 auto& w = workspace[g];
94 output[g][i + start] = medians::direct<Output_, Value_, Index_>(w.data(), w.size(), group_sizes[g], mopt.skip_nan);
95 w.clear();
96 }
97 }
98
99 } else {
100 auto ext = tatami::consecutive_extractor<false>(mat, row, start, len);
101 for (Index_ i = 0; i < len; ++i) {
102 auto ptr = ext->fetch(xbuffer.data());
103 for (Index_ j = 0; j < otherdim; ++j) {
104 workspace[group[j]].push_back(ptr[j]);
105 }
106
107 for (I<decltype(ngroups)> g = 0; g < ngroups; ++g) {
108 auto& w = workspace[g];
109 output[g][i + start] = medians::direct<Output_, Value_, Index_>(w.data(), w.size(), mopt.skip_nan);
110 w.clear();
111 }
112 }
113 }
114 }, dim, mopt.num_threads);
115}
116
120// Back-compatibility.
121template<typename Value_, typename Index_, typename Group_, class GroupSizes_, typename Output_>
122void apply(bool row, const tatami::Matrix<Value_, Index_>* p, const Group_* group, const GroupSizes_& group_sizes, Output_** output, const Options& mopt) {
123 apply(row, *p, group, group_sizes, output, mopt);
124}
146template<typename Output_ = double, typename Value_, typename Index_, typename Group_>
147std::vector<std::vector<Output_> > by_row(const tatami::Matrix<Value_, Index_>& mat, const Group_* group, const Options& mopt) {
148 auto mydim = mat.nrow();
149 auto group_sizes = tabulate_groups(group, mat.ncol());
150
151 auto output = sanisizer::create<std::vector<std::vector<Output_> > >(group_sizes.size());
152 std::vector<Output_*> ptrs;
153 ptrs.reserve(output.size());
154 for (auto& o : output) {
155 o.resize(mydim);
156 ptrs.push_back(o.data());
157 }
158
159 apply(true, mat, group, group_sizes, ptrs.data(), mopt);
160 return output;
161}
162
166template<typename Output_ = double, typename Value_, typename Index_, typename Group_>
167std::vector<std::vector<Output_> > by_row(const tatami::Matrix<Value_, Index_>* p, const Group_* group, const Options& mopt) {
168 return by_row<Output_>(*p, group, mopt);
169}
170
171template<typename Output_ = double, typename Value_, typename Index_, typename Group_>
172std::vector<std::vector<Output_> > by_row(const tatami::Matrix<Value_, Index_>& mat, const Group_* group) {
173 return by_row<Output_>(mat, group, Options());
174}
175
176template<typename Output_ = double, typename Value_, typename Index_, typename Group_>
177std::vector<std::vector<Output_> > by_row(const tatami::Matrix<Value_, Index_>* p, const Group_* group) {
178 return by_row<Output_>(*p, group);
179}
201template<typename Output_ = double, typename Value_, typename Index_, typename Group_>
202std::vector<std::vector<Output_> > by_column(const tatami::Matrix<Value_, Index_>& mat, const Group_* group, const Options& mopt) {
203 auto mydim = mat.ncol();
204 auto group_sizes = tabulate_groups(group, mat.nrow());
205
206 auto output = sanisizer::create<std::vector<std::vector<Output_> > >(group_sizes.size());
207 std::vector<Output_*> ptrs;
208 ptrs.reserve(output.size());
209 for (auto& o : output) {
210 o.resize(mydim);
211 ptrs.push_back(o.data());
212 }
213
214 apply(false, mat, group, group_sizes, ptrs.data(), mopt);
215 return output;
216}
217
221// Back-compatibility.
222template<typename Output_ = double, typename Value_, typename Index_, typename Group_>
223std::vector<std::vector<Output_> > by_column(const tatami::Matrix<Value_, Index_>* p, const Group_* group, const Options& mopt) {
224 return by_column<Output_>(*p, group, mopt);
225}
226
227template<typename Output_ = double, typename Value_, typename Index_, typename Group_>
228std::vector<std::vector<Output_> > by_column(const tatami::Matrix<Value_, Index_>& mat, const Group_* group) {
229 return by_column<Output_>(mat, group, Options());
230}
231
232template<typename Output_ = double, typename Value_, typename Index_, typename Group_>
233std::vector<std::vector<Output_> > by_column(const tatami::Matrix<Value_, Index_>* p, const Group_* group) {
234 return by_column<Output_>(*p, group);
235}
240}
241
242}
243
244#endif
virtual Index_ ncol() const=0
virtual Index_ nrow() const=0
virtual std::unique_ptr< MyopicSparseExtractor< Value_, Index_ > > sparse(bool row, const Options &opt) const=0
Compute row and column medians from a tatami::Matrix.
std::vector< std::vector< Output_ > > by_column(const tatami::Matrix< Value_, Index_ > &mat, const Group_ *group, const Options &mopt)
Definition grouped_medians.hpp:202
void apply(bool row, const tatami::Matrix< Value_, Index_ > &mat, const Group_ *group, const GroupSizes_ &group_sizes, Output_ **output, const Options &mopt)
Definition grouped_medians.hpp:67
std::vector< std::vector< Output_ > > by_row(const tatami::Matrix< Value_, Index_ > &mat, const Group_ *group, const Options &mopt)
Definition grouped_medians.hpp:147
Output_ direct(Value_ *ptr, Index_ num, bool skip_nan)
Definition medians.hpp:83
Functions to compute statistics from a tatami::Matrix.
Definition counts.hpp:18
std::vector< Size_ > tabulate_groups(const Group_ *group, Size_ n)
Definition utils.hpp:64
void parallelize(Function_ fun, const Index_ tasks, const int threads)
Container_ create_container_of_Index_size(const Index_ x, Args_ &&... args)
auto consecutive_extractor(const Matrix< Value_, Index_ > &matrix, const bool row, const Index_ iter_start, const Index_ iter_length, Args_ &&... args)
bool sparse_ordered_index
Grouped median calculation options.
Definition grouped_medians.hpp:30
int num_threads
Definition grouped_medians.hpp:41
bool skip_nan
Definition grouped_medians.hpp:35
Utilities for computing matrix statistics.