1#ifndef TATAMI_STATS_VARS_HPP
2#define TATAMI_STATS_VARS_HPP
14#include "sanisizer/sanisizer.hpp"
52template<
typename Output_ =
double,
typename Value_,
typename Index_ >
53void add_welford(Output_& mean, Output_& sumsq, Value_ value, Index_ count) {
54 Output_ delta = value - mean;
55 mean += delta / count;
56 sumsq += delta * (value - mean);
59template<
typename Output_ =
double,
typename Index_ >
60void add_welford_zeros(Output_& mean, Output_& sumsq, Index_ num_nonzero, Index_ num_all) {
61 auto ratio =
static_cast<Output_
>(num_nonzero) /
static_cast<Output_
>(num_all);
62 sumsq += mean * mean * ratio * (num_all - num_nonzero);
67template<
typename Index_>
69 MockVector(std::size_t) {}
70 Index_& operator[](std::size_t) {
return out; }
71 std::size_t size() {
return 0; }
99template<
typename Output_ =
double,
typename Value_,
typename Index_ >
100std::pair<Output_, Output_>
direct(
const Value_* value, Index_ num_nonzero, Index_ num_all,
bool skip_nan) {
104 ::tatami_stats::internal::nanable_ifelse<Value_>(
108 for (Index_ i = 0; i < num_nonzero; ++i, ++copy) {
110 if (std::isnan(val)) {
119 for (Index_ i = 0; i < num_nonzero; ++i, ++copy) {
125 auto count = num_all - lost;
129 ::tatami_stats::internal::nanable_ifelse<Value_>(
132 for (Index_ i = 0; i < num_nonzero; ++i) {
134 if (!std::isnan(val)) {
135 auto delta =
static_cast<Output_
>(val) - mean;
136 var += delta * delta;
141 for (Index_ i = 0; i < num_nonzero; ++i) {
142 auto delta =
static_cast<Output_
>(value[i]) - mean;
143 var += delta * delta;
148 if (num_nonzero < num_all) {
149 var +=
static_cast<Output_
>(num_all - num_nonzero) * mean * mean;
153 return std::make_pair(std::numeric_limits<Output_>::quiet_NaN(), std::numeric_limits<Output_>::quiet_NaN());
154 }
else if (count == 1) {
155 return std::make_pair(mean, std::numeric_limits<Output_>::quiet_NaN());
157 return std::make_pair(mean, var / (count - 1));
178template<
typename Output_ =
double,
typename Value_,
typename Index_ >
179std::pair<Output_, Output_>
direct(
const Value_* ptr, Index_ num,
bool skip_nan) {
198template<
typename Output_,
typename Value_,
typename Index_>
209 RunningDense(Index_ num, Output_* mean, Output_* variance,
bool skip_nan) :
212 my_variance(variance),
213 my_skip_nan(skip_nan),
214 my_ok_count(skip_nan ?
tatami::can_cast_Index_to_container_size<I<decltype(my_ok_count)> >(num) : static_cast<Index_>(0))
221 void add(
const Value_* ptr) {
223 my_count = sanisizer::sum<Index_>(my_count, 1);
225 ::tatami_stats::internal::nanable_ifelse<Value_>(
228 for (Index_ i = 0; i < my_num; ++i, ++ptr) {
230 if (!std::isnan(val)) {
231 internal::add_welford(my_mean[i], my_variance[i], val, ++(my_ok_count[i]));
236 for (Index_ i = 0; i < my_num; ++i, ++ptr) {
237 internal::add_welford(my_mean[i], my_variance[i], *ptr, my_count);
247 ::tatami_stats::internal::nanable_ifelse<Value_>(
250 for (Index_ i = 0; i < my_num; ++i) {
251 auto ct = my_ok_count[i];
253 my_variance[i] = std::numeric_limits<Output_>::quiet_NaN();
255 my_mean[i] = std::numeric_limits<Output_>::quiet_NaN();
258 my_variance[i] /= ct - 1;
264 std::fill_n(my_variance, my_num, std::numeric_limits<Output_>::quiet_NaN());
266 std::fill_n(my_mean, my_num, std::numeric_limits<Output_>::quiet_NaN());
269 for (Index_ i = 0; i < my_num; ++i) {
270 my_variance[i] /= my_count - 1;
280 Output_* my_variance;
283 typename std::conditional<std::numeric_limits<Value_>::has_quiet_NaN, std::vector<Index_>, internal::MockVector<Index_> >::type my_ok_count;
297template<
typename Output_,
typename Value_,
typename Index_>
311 RunningSparse(Index_ num, Output_* mean, Output_* variance,
bool skip_nan, Index_ subtract = 0) :
314 my_variance(variance),
315 my_nonzero(
tatami::can_cast_Index_to_container_size<I<decltype(my_nonzero)> >(num)),
316 my_skip_nan(skip_nan),
317 my_subtract(subtract),
318 my_nan(skip_nan ?
tatami::can_cast_Index_to_container_size<I<decltype(my_nan)> >(num) : static_cast<Index_>(0))
327 void add(
const Value_* value,
const Index_* index, Index_ number) {
329 my_count = sanisizer::sum<Index_>(my_count, 1);
331 ::tatami_stats::internal::nanable_ifelse<Value_>(
334 for (Index_ i = 0; i < number; ++i) {
336 auto ri = index[i] - my_subtract;
337 if (std::isnan(val)) {
340 internal::add_welford(my_mean[ri], my_variance[ri], val, ++(my_nonzero[ri]));
345 for (Index_ i = 0; i < number; ++i) {
346 auto ri = index[i] - my_subtract;
347 internal::add_welford(my_mean[ri], my_variance[ri], value[i], ++(my_nonzero[ri]));
357 ::tatami_stats::internal::nanable_ifelse<Value_>(
360 for (Index_ i = 0; i < my_num; ++i) {
361 auto& curM = my_mean[i];
362 auto& curV = my_variance[i];
363 Index_ ct = my_count - my_nan[i];
366 curV = std::numeric_limits<Output_>::quiet_NaN();
368 curM = std::numeric_limits<Output_>::quiet_NaN();
371 internal::add_welford_zeros(curM, curV, my_nonzero[i], ct);
378 std::fill_n(my_variance, my_num, std::numeric_limits<Output_>::quiet_NaN());
380 std::fill_n(my_mean, my_num, std::numeric_limits<Output_>::quiet_NaN());
383 for (Index_ i = 0; i < my_num; ++i) {
384 auto& var = my_variance[i];
385 internal::add_welford_zeros(my_mean[i], var, my_nonzero[i], my_count);
396 Output_* my_variance;
397 std::vector<Index_> my_nonzero;
401 typename std::conditional<std::numeric_limits<Value_>::has_quiet_NaN, std::vector<Index_>, internal::MockVector<Index_> >::type my_nan;
421template<
typename Value_,
typename Index_,
typename Output_>
423 auto dim = (row ? mat.
nrow() : mat.
ncol());
424 auto otherdim = (row ? mat.
ncol() : mat.
nrow());
434 for (Index_ x = 0; x < l; ++x) {
435 auto out = ext->fetch(vbuffer.data(), NULL);
450 for (Index_ x = 0; x < otherdim; ++x) {
451 auto out = ext->fetch(vbuffer.data(), ibuffer.data());
452 runner.
add(out.value, out.index, out.number);
456 local_output.transfer();
465 for (Index_ x = 0; x < l; ++x) {
466 auto out = ext->fetch(buffer.data());
480 for (Index_ x = 0; x < otherdim; ++x) {
481 runner.
add(ext->fetch(buffer.data()));
485 local_output.transfer();
495template<
typename Value_,
typename Index_,
typename Output_>
497 apply(row, *p, output, vopt);
516template<
typename Output_ =
double,
typename Value_,
typename Index_>
519 apply(
false, mat, output.data(), vopt);
527template<
typename Output_ =
double,
typename Value_,
typename Index_>
532template<
typename Output_ =
double,
typename Value_,
typename Index_>
537template<
typename Output_ =
double,
typename Value_,
typename Index_>
558template<
typename Output_ =
double,
typename Value_,
typename Index_>
561 apply(
true, mat, output.data(), vopt);
569template<
typename Output_ =
double,
typename Value_,
typename Index_>
574template<
typename Output_ =
double,
typename Value_,
typename Index_>
579template<
typename Output_ =
double,
typename Value_,
typename Index_>
virtual Index_ ncol() const=0
virtual Index_ nrow() const=0
virtual bool prefer_rows() const=0
virtual std::unique_ptr< MyopicSparseExtractor< Value_, Index_ > > sparse(bool row, const Options &opt) const=0
Local output buffer for running calculations.
Definition utils.hpp:93
Running variances from dense data.
Definition variances.hpp:199
void add(const Value_ *ptr)
Definition variances.hpp:221
void finish()
Definition variances.hpp:246
RunningDense(Index_ num, Output_ *mean, Output_ *variance, bool skip_nan)
Definition variances.hpp:209
Running variances from sparse data.
Definition variances.hpp:298
void finish()
Definition variances.hpp:356
RunningSparse(Index_ num, Output_ *mean, Output_ *variance, bool skip_nan, Index_ subtract=0)
Definition variances.hpp:311
void add(const Value_ *value, const Index_ *index, Index_ number)
Definition variances.hpp:327
std::vector< Output_ > by_column(const tatami::Matrix< Value_, Index_ > &mat, const Options &vopt)
Definition variances.hpp:517
std::pair< Output_, Output_ > direct(const Value_ *value, Index_ num_nonzero, Index_ num_all, bool skip_nan)
Definition variances.hpp:100
void apply(bool row, const tatami::Matrix< Value_, Index_ > &mat, Output_ *output, const Options &vopt)
Definition variances.hpp:422
std::vector< Output_ > by_row(const tatami::Matrix< Value_, Index_ > &mat, const Options &vopt)
Definition variances.hpp:559
Functions to compute statistics from a tatami::Matrix.
Definition counts.hpp:18
void parallelize(Function_ fun, const Index_ tasks, const int threads)
Container_ create_container_of_Index_size(const Index_ x, Args_ &&... args)
auto consecutive_extractor(const Matrix< Value_, Index_ > &matrix, const bool row, const Index_ iter_start, const Index_ iter_length, Args_ &&... args)
bool sparse_extract_index
Variance calculation options.
Definition variances.hpp:33
int num_threads
Definition variances.hpp:44
bool skip_nan
Definition variances.hpp:38
Utilities for computing matrix statistics.