1#ifndef TATAMI_STATS_VARS_HPP
2#define TATAMI_STATS_VARS_HPP
49template<
typename Output_ =
double,
typename Value_,
typename Index_ >
50void add_welford(Output_& mean, Output_& sumsq, Value_ value, Index_ count) {
51 Output_ delta = value - mean;
52 mean += delta / count;
53 sumsq += delta * (value - mean);
56template<
typename Output_ =
double,
typename Index_ >
57void add_welford_zeros(Output_& mean, Output_& sumsq, Index_ num_nonzero, Index_ num_all) {
58 auto ratio =
static_cast<Output_
>(num_nonzero) /
static_cast<Output_
>(num_all);
59 sumsq += mean * mean * ratio * (num_all - num_nonzero);
64template<
typename Index_>
67 Index_& operator[](
size_t) {
return out; }
94template<
typename Output_ =
double,
typename Value_,
typename Index_ >
95std::pair<Output_, Output_>
direct(
const Value_* value, Index_ num_nonzero, Index_ num_all,
bool skip_nan) {
99 ::tatami_stats::internal::nanable_ifelse<Value_>(
103 for (Index_ i = 0; i < num_nonzero; ++i, ++copy) {
105 if (std::isnan(val)) {
114 for (Index_ i = 0; i < num_nonzero; ++i, ++copy) {
120 auto count = num_all - lost;
124 ::tatami_stats::internal::nanable_ifelse<Value_>(
127 for (Index_ i = 0; i < num_nonzero; ++i) {
129 if (!std::isnan(val)) {
130 auto delta =
static_cast<Output_
>(val) - mean;
131 var += delta * delta;
136 for (Index_ i = 0; i < num_nonzero; ++i) {
137 auto delta =
static_cast<Output_
>(value[i]) - mean;
138 var += delta * delta;
143 if (num_nonzero < num_all) {
144 var +=
static_cast<Output_
>(num_all - num_nonzero) * mean * mean;
148 return std::make_pair(std::numeric_limits<Output_>::quiet_NaN(), std::numeric_limits<Output_>::quiet_NaN());
149 }
else if (count == 1) {
150 return std::make_pair(mean, std::numeric_limits<Output_>::quiet_NaN());
152 return std::make_pair(mean, var / (count - 1));
172template<
typename Output_ =
double,
typename Value_,
typename Index_ >
173std::pair<Output_, Output_>
direct(
const Value_* ptr, Index_ num,
bool skip_nan) {
174 return direct<Output_>(ptr, num, num, skip_nan);
191template<
typename Output_,
typename Value_,
typename Index_>
202 RunningDense(Index_ num, Output_* mean, Output_* variance,
bool skip_nan) :
203 my_num(num), my_mean(mean), my_variance(variance), my_skip_nan(skip_nan), my_ok_count(skip_nan ? num : 0) {}
209 void add(
const Value_* ptr) {
210 ::tatami_stats::internal::nanable_ifelse<Value_>(
213 for (Index_ i = 0; i < my_num; ++i, ++ptr) {
215 if (!std::isnan(val)) {
216 internal::add_welford(my_mean[i], my_variance[i], val, ++(my_ok_count[i]));
222 for (Index_ i = 0; i < my_num; ++i, ++ptr) {
223 internal::add_welford(my_mean[i], my_variance[i], *ptr, my_count);
233 ::tatami_stats::internal::nanable_ifelse<Value_>(
236 for (Index_ i = 0; i < my_num; ++i) {
237 auto ct = my_ok_count[i];
239 my_variance[i] = std::numeric_limits<Output_>::quiet_NaN();
241 my_mean[i] = std::numeric_limits<Output_>::quiet_NaN();
244 my_variance[i] /= ct - 1;
250 std::fill_n(my_variance, my_num, std::numeric_limits<Output_>::quiet_NaN());
252 std::fill_n(my_mean, my_num, std::numeric_limits<Output_>::quiet_NaN());
255 for (Index_ i = 0; i < my_num; ++i) {
256 my_variance[i] /= my_count - 1;
266 Output_* my_variance;
269 typename std::conditional<std::numeric_limits<Value_>::has_quiet_NaN, std::vector<Index_>, internal::MockVector<Index_> >::type my_ok_count;
282template<
typename Output_,
typename Value_,
typename Index_>
296 RunningSparse(Index_ num, Output_* mean, Output_* variance,
bool skip_nan, Index_ subtract = 0) :
297 my_num(num), my_mean(mean), my_variance(variance), my_nonzero(num), my_skip_nan(skip_nan), my_subtract(subtract), my_nan(skip_nan ? num : 0) {}
305 void add(
const Value_* value,
const Index_* index, Index_ number) {
308 ::tatami_stats::internal::nanable_ifelse<Value_>(
311 for (Index_ i = 0; i < number; ++i) {
313 auto ri = index[i] - my_subtract;
314 if (std::isnan(val)) {
317 internal::add_welford(my_mean[ri], my_variance[ri], val, ++(my_nonzero[ri]));
322 for (Index_ i = 0; i < number; ++i) {
323 auto ri = index[i] - my_subtract;
324 internal::add_welford(my_mean[ri], my_variance[ri], value[i], ++(my_nonzero[ri]));
334 ::tatami_stats::internal::nanable_ifelse<Value_>(
337 for (Index_ i = 0; i < my_num; ++i) {
338 auto& curM = my_mean[i];
339 auto& curV = my_variance[i];
340 Index_ ct = my_count - my_nan[i];
343 curV = std::numeric_limits<Output_>::quiet_NaN();
345 curM = std::numeric_limits<Output_>::quiet_NaN();
348 internal::add_welford_zeros(curM, curV, my_nonzero[i], ct);
355 std::fill_n(my_variance, my_num, std::numeric_limits<Output_>::quiet_NaN());
357 std::fill_n(my_mean, my_num, std::numeric_limits<Output_>::quiet_NaN());
360 for (Index_ i = 0; i < my_num; ++i) {
361 auto& var = my_variance[i];
362 internal::add_welford_zeros(my_mean[i], var, my_nonzero[i], my_count);
373 Output_* my_variance;
374 std::vector<Index_> my_nonzero;
378 typename std::conditional<std::numeric_limits<Value_>::has_quiet_NaN, std::vector<Index_>, internal::MockVector<Index_> >::type my_nan;
397template<
typename Value_,
typename Index_,
typename Output_>
399 auto dim = (row ? p->
nrow() : p->
ncol());
400 auto otherdim = (row ? p->
ncol() : p->
nrow());
409 std::vector<Value_> vbuffer(otherdim);
410 for (Index_ x = 0; x < l; ++x) {
411 auto out = ext->fetch(vbuffer.data(), NULL);
412 output[x + s] = variances::direct<Output_>(out.value, out.number, otherdim, vopt.
skip_nan).second;
419 std::vector<Value_> vbuffer(l);
420 std::vector<Index_> ibuffer(l);
422 std::vector<Output_> running_means(l);
426 for (Index_ x = 0; x < otherdim; ++x) {
427 auto out = ext->fetch(vbuffer.data(), ibuffer.data());
428 runner.
add(out.value, out.index, out.number);
440 std::vector<Value_> buffer(otherdim);
441 for (Index_ x = 0; x < l; ++x) {
442 auto out = ext->fetch(buffer.data());
443 output[x + s] = variances::direct<Output_>(out, otherdim, vopt.
skip_nan).second;
450 std::vector<Value_> buffer(l);
452 std::vector<Output_> running_means(l);
456 for (Index_ x = 0; x < otherdim; ++x) {
457 runner.
add(ext->fetch(buffer.data()));
479template<
typename Output_ =
double,
typename Value_,
typename Index_>
481 std::vector<Output_> output(p->
ncol());
482 apply(
false, p, output.data(), vopt);
497template<
typename Output_ =
double,
typename Value_,
typename Index_>
515template<
typename Output_ =
double,
typename Value_,
typename Index_>
517 std::vector<Output_> output(p->
nrow());
518 apply(
true, p, output.data(), vopt);
533template<
typename Output_ =
double,
typename Value_,
typename Index_>
virtual Index_ ncol() const=0
virtual Index_ nrow() const=0
virtual bool prefer_rows() const=0
virtual std::unique_ptr< MyopicSparseExtractor< Value_, Index_ > > sparse(bool row, const Options &opt) const=0
Local output buffer for running calculations.
Definition utils.hpp:78
void transfer()
Definition utils.hpp:135
Output_ * data()
Definition utils.hpp:118
Running variances from dense data.
Definition variances.hpp:192
void add(const Value_ *ptr)
Definition variances.hpp:209
void finish()
Definition variances.hpp:232
RunningDense(Index_ num, Output_ *mean, Output_ *variance, bool skip_nan)
Definition variances.hpp:202
Running variances from sparse data.
Definition variances.hpp:283
void finish()
Definition variances.hpp:333
RunningSparse(Index_ num, Output_ *mean, Output_ *variance, bool skip_nan, Index_ subtract=0)
Definition variances.hpp:296
void add(const Value_ *value, const Index_ *index, Index_ number)
Definition variances.hpp:305
std::vector< Output_ > by_row(const tatami::Matrix< Value_, Index_ > *p, const Options &vopt)
Definition variances.hpp:516
std::pair< Output_, Output_ > direct(const Value_ *value, Index_ num_nonzero, Index_ num_all, bool skip_nan)
Definition variances.hpp:95
void apply(bool row, const tatami::Matrix< Value_, Index_ > *p, Output_ *output, const Options &vopt)
Definition variances.hpp:398
std::vector< Output_ > by_column(const tatami::Matrix< Value_, Index_ > *p, const Options &vopt)
Definition variances.hpp:480
Functions to compute statistics from a tatami::Matrix.
Definition counts.hpp:18
void parallelize(Function_ fun, Index_ tasks, int threads)
auto consecutive_extractor(const Matrix< Value_, Index_ > *mat, bool row, Index_ iter_start, Index_ iter_length, Args_ &&... args)
bool sparse_extract_index
Variance calculation options.
Definition variances.hpp:30
int num_threads
Definition variances.hpp:41
bool skip_nan
Definition variances.hpp:35
Utilities for computing matrix statistics.