tatami_stats
Matrix statistics for tatami
Loading...
Searching...
No Matches
grouped_medians.hpp
Go to the documentation of this file.
1#ifndef TATAMI_STATS_GROUPED_MEDIANS_HPP
2#define TATAMI_STATS_GROUPED_MEDIANS_HPP
3
4#include "utils.hpp"
5#include "tatami/tatami.hpp"
6#include "medians.hpp"
7#include <vector>
8#include <algorithm>
9
16namespace tatami_stats {
17
22namespace grouped_medians {
23
27struct Options {
32 bool skip_nan = false;
33
38 int num_threads = 1;
39};
40
64template<typename Value_, typename Index_, typename Group_, class GroupSizes_, typename Output_>
65void apply(bool row, const tatami::Matrix<Value_, Index_>* p, const Group_* group, const GroupSizes_& group_sizes, Output_** output, const Options& mopt) {
66 Index_ dim = (row ? p->nrow() : p->ncol());
67 Index_ otherdim = (row ? p->ncol() : p->nrow());
68
69 tatami::parallelize([&](int, Index_ start, Index_ len) -> void {
70 std::vector<Value_> xbuffer(otherdim);
71
72 size_t ngroups = group_sizes.size();
73 std::vector<std::vector<double> > workspace(ngroups);
74 for (size_t g = 0; g < ngroups; ++g) {
75 workspace[g].reserve(group_sizes[g]);
76 }
77
78 if (p->sparse()) {
80 opt.sparse_ordered_index = false;
81
82 auto ext = tatami::consecutive_extractor<true>(p, row, start, len, opt);
83 std::vector<Index_> ibuffer(otherdim);
84 for (Index_ i = 0; i < len; ++i) {
85 auto range = ext->fetch(xbuffer.data(), ibuffer.data());
86 for (Index_ j = 0; j < range.number; ++j) {
87 workspace[group[range.index[j]]].push_back(range.value[j]);
88 }
89
90 for (size_t g = 0; g < ngroups; ++g) {
91 auto& w = workspace[g];
92 output[g][i + start] = medians::direct(w.data(), w.size(), static_cast<size_t>(group_sizes[g]), mopt.skip_nan);
93 w.clear();
94 }
95 }
96
97 } else {
98 auto ext = tatami::consecutive_extractor<false>(p, row, start, len);
99 for (Index_ i = 0; i < len; ++i) {
100 auto ptr = ext->fetch(xbuffer.data());
101 for (Index_ j = 0; j < otherdim; ++j) {
102 workspace[group[j]].push_back(ptr[j]);
103 }
104
105 for (size_t g = 0; g < ngroups; ++g) {
106 auto& w = workspace[g];
107 output[g][i + start] = medians::direct(w.data(), w.size(), mopt.skip_nan);
108 w.clear();
109 }
110 }
111 }
112 }, dim, mopt.num_threads);
113}
114
132template<typename Output_ = double, typename Value_, typename Index_, typename Group_>
133std::vector<std::vector<Output_> > by_row(const tatami::Matrix<Value_, Index_>* p, const Group_* group, const Options& mopt) {
134 size_t mydim = p->nrow();
135 auto group_sizes = tabulate_groups(group, p->ncol());
136
137 std::vector<std::vector<Output_> > output(group_sizes.size());
138 std::vector<Output_*> ptrs;
139 ptrs.reserve(output.size());
140 for (auto& o : output) {
141 o.resize(mydim);
142 ptrs.push_back(o.data());
143 }
144
145 apply(true, p, group, group_sizes, ptrs.data(), mopt);
146 return output;
147}
148
165template<typename Output_ = double, typename Value_, typename Index_, typename Group_>
166std::vector<std::vector<Output_> > by_row(const tatami::Matrix<Value_, Index_>* p, const Group_* group) {
167 return by_row(p, group, Options());
168}
169
187template<typename Output_ = double, typename Value_, typename Index_, typename Group_>
188std::vector<std::vector<Output_> > by_column(const tatami::Matrix<Value_, Index_>* p, const Group_* group, const Options& mopt) {
189 size_t mydim = p->ncol();
190 auto group_sizes = tabulate_groups(group, p->nrow());
191
192 std::vector<std::vector<Output_> > output(group_sizes.size());
193 std::vector<Output_*> ptrs;
194 ptrs.reserve(output.size());
195 for (auto& o : output) {
196 o.resize(mydim);
197 ptrs.push_back(o.data());
198 }
199
200 apply(false, p, group, group_sizes, ptrs.data(), mopt);
201 return output;
202}
203
220template<typename Output_ = double, typename Value_, typename Index_, typename Group_>
221std::vector<std::vector<Output_> > by_column(const tatami::Matrix<Value_, Index_>* p, const Group_* group) {
222 return by_column(p, group, Options());
223}
224
225}
226
227}
228
229#endif
virtual Index_ ncol() const=0
virtual Index_ nrow() const=0
virtual std::unique_ptr< MyopicSparseExtractor< Value_, Index_ > > sparse(bool row, const Options &opt) const=0
Compute row and column medians from a tatami::Matrix.
std::vector< std::vector< Output_ > > by_row(const tatami::Matrix< Value_, Index_ > *p, const Group_ *group, const Options &mopt)
Definition grouped_medians.hpp:133
std::vector< std::vector< Output_ > > by_column(const tatami::Matrix< Value_, Index_ > *p, const Group_ *group, const Options &mopt)
Definition grouped_medians.hpp:188
void apply(bool row, const tatami::Matrix< Value_, Index_ > *p, const Group_ *group, const GroupSizes_ &group_sizes, Output_ **output, const Options &mopt)
Definition grouped_medians.hpp:65
Output_ direct(Value_ *ptr, Index_ num, bool skip_nan)
Definition medians.hpp:82
Functions to compute statistics from a tatami::Matrix.
Definition counts.hpp:18
std::vector< Size_ > tabulate_groups(const Group_ *group, Size_ n)
Definition utils.hpp:49
void parallelize(Function_ fun, Index_ tasks, int threads)
auto consecutive_extractor(const Matrix< Value_, Index_ > *mat, bool row, Index_ iter_start, Index_ iter_length, Args_ &&... args)
bool sparse_ordered_index
Grouped median calculation options.
Definition grouped_medians.hpp:27
int num_threads
Definition grouped_medians.hpp:38
bool skip_nan
Definition grouped_medians.hpp:32
Utilities for computing matrix statistics.