1#ifndef TATAMI_STATS_VARS_HPP
2#define TATAMI_STATS_VARS_HPP
14#include "sanisizer/sanisizer.hpp"
52template<
typename Output_ =
double,
typename Value_,
typename Index_ >
53void add_welford(Output_& mean, Output_& sumsq, Value_ value, Index_ count) {
54 Output_ delta = value - mean;
55 mean += delta / count;
56 sumsq += delta * (value - mean);
59template<
typename Output_ =
double,
typename Index_ >
60void add_welford_zeros(Output_& mean, Output_& sumsq, Index_ num_nonzero, Index_ num_all) {
61 auto ratio =
static_cast<Output_
>(num_nonzero) /
static_cast<Output_
>(num_all);
62 sumsq += mean * mean * ratio * (num_all - num_nonzero);
67template<
typename Index_>
69 MockVector(std::size_t) {}
70 Index_& operator[](std::size_t) {
return out; }
71 std::size_t size() {
return 0; }
98template<
typename Output_ =
double,
typename Value_,
typename Index_ >
99std::pair<Output_, Output_>
direct(
const Value_* value, Index_ num_nonzero, Index_ num_all,
bool skip_nan) {
103 ::tatami_stats::internal::nanable_ifelse<Value_>(
107 for (Index_ i = 0; i < num_nonzero; ++i, ++copy) {
109 if (std::isnan(val)) {
118 for (Index_ i = 0; i < num_nonzero; ++i, ++copy) {
124 auto count = num_all - lost;
128 ::tatami_stats::internal::nanable_ifelse<Value_>(
131 for (Index_ i = 0; i < num_nonzero; ++i) {
133 if (!std::isnan(val)) {
134 auto delta =
static_cast<Output_
>(val) - mean;
135 var += delta * delta;
140 for (Index_ i = 0; i < num_nonzero; ++i) {
141 auto delta =
static_cast<Output_
>(value[i]) - mean;
142 var += delta * delta;
147 if (num_nonzero < num_all) {
148 var +=
static_cast<Output_
>(num_all - num_nonzero) * mean * mean;
152 return std::make_pair(std::numeric_limits<Output_>::quiet_NaN(), std::numeric_limits<Output_>::quiet_NaN());
153 }
else if (count == 1) {
154 return std::make_pair(mean, std::numeric_limits<Output_>::quiet_NaN());
156 return std::make_pair(mean, var / (count - 1));
176template<
typename Output_ =
double,
typename Value_,
typename Index_ >
177std::pair<Output_, Output_>
direct(
const Value_* ptr, Index_ num,
bool skip_nan) {
195template<
typename Output_,
typename Value_,
typename Index_>
206 RunningDense(Index_ num, Output_* mean, Output_* variance,
bool skip_nan) :
209 my_variance(variance),
210 my_skip_nan(skip_nan),
211 my_ok_count(skip_nan ?
tatami::can_cast_Index_to_container_size<decltype(my_ok_count)>(num) : static_cast<Index_>(0))
218 void add(
const Value_* ptr) {
220 my_count = sanisizer::sum<Index_>(my_count, 1);
222 ::tatami_stats::internal::nanable_ifelse<Value_>(
225 for (Index_ i = 0; i < my_num; ++i, ++ptr) {
227 if (!std::isnan(val)) {
228 internal::add_welford(my_mean[i], my_variance[i], val, ++(my_ok_count[i]));
233 for (Index_ i = 0; i < my_num; ++i, ++ptr) {
234 internal::add_welford(my_mean[i], my_variance[i], *ptr, my_count);
244 ::tatami_stats::internal::nanable_ifelse<Value_>(
247 for (Index_ i = 0; i < my_num; ++i) {
248 auto ct = my_ok_count[i];
250 my_variance[i] = std::numeric_limits<Output_>::quiet_NaN();
252 my_mean[i] = std::numeric_limits<Output_>::quiet_NaN();
255 my_variance[i] /= ct - 1;
261 std::fill_n(my_variance, my_num, std::numeric_limits<Output_>::quiet_NaN());
263 std::fill_n(my_mean, my_num, std::numeric_limits<Output_>::quiet_NaN());
266 for (Index_ i = 0; i < my_num; ++i) {
267 my_variance[i] /= my_count - 1;
277 Output_* my_variance;
280 typename std::conditional<std::numeric_limits<Value_>::has_quiet_NaN, std::vector<Index_>, internal::MockVector<Index_> >::type my_ok_count;
293template<
typename Output_,
typename Value_,
typename Index_>
307 RunningSparse(Index_ num, Output_* mean, Output_* variance,
bool skip_nan, Index_ subtract = 0) :
310 my_variance(variance),
311 my_nonzero(
tatami::can_cast_Index_to_container_size<decltype(my_nonzero)>(num)),
312 my_skip_nan(skip_nan),
313 my_subtract(subtract),
314 my_nan(skip_nan ?
tatami::can_cast_Index_to_container_size<decltype(my_nan)>(num) : static_cast<Index_>(0))
323 void add(
const Value_* value,
const Index_* index, Index_ number) {
325 my_count = sanisizer::sum<Index_>(my_count, 1);
327 ::tatami_stats::internal::nanable_ifelse<Value_>(
330 for (Index_ i = 0; i < number; ++i) {
332 auto ri = index[i] - my_subtract;
333 if (std::isnan(val)) {
336 internal::add_welford(my_mean[ri], my_variance[ri], val, ++(my_nonzero[ri]));
341 for (Index_ i = 0; i < number; ++i) {
342 auto ri = index[i] - my_subtract;
343 internal::add_welford(my_mean[ri], my_variance[ri], value[i], ++(my_nonzero[ri]));
353 ::tatami_stats::internal::nanable_ifelse<Value_>(
356 for (Index_ i = 0; i < my_num; ++i) {
357 auto& curM = my_mean[i];
358 auto& curV = my_variance[i];
359 Index_ ct = my_count - my_nan[i];
362 curV = std::numeric_limits<Output_>::quiet_NaN();
364 curM = std::numeric_limits<Output_>::quiet_NaN();
367 internal::add_welford_zeros(curM, curV, my_nonzero[i], ct);
374 std::fill_n(my_variance, my_num, std::numeric_limits<Output_>::quiet_NaN());
376 std::fill_n(my_mean, my_num, std::numeric_limits<Output_>::quiet_NaN());
379 for (Index_ i = 0; i < my_num; ++i) {
380 auto& var = my_variance[i];
381 internal::add_welford_zeros(my_mean[i], var, my_nonzero[i], my_count);
392 Output_* my_variance;
393 std::vector<Index_> my_nonzero;
397 typename std::conditional<std::numeric_limits<Value_>::has_quiet_NaN, std::vector<Index_>, internal::MockVector<Index_> >::type my_nan;
416template<
typename Value_,
typename Index_,
typename Output_>
418 auto dim = (row ? mat.
nrow() : mat.
ncol());
419 auto otherdim = (row ? mat.
ncol() : mat.
nrow());
429 for (Index_ x = 0; x < l; ++x) {
430 auto out = ext->fetch(vbuffer.data(), NULL);
445 for (Index_ x = 0; x < otherdim; ++x) {
446 auto out = ext->fetch(vbuffer.data(), ibuffer.data());
447 runner.
add(out.value, out.index, out.number);
451 local_output.transfer();
460 for (Index_ x = 0; x < l; ++x) {
461 auto out = ext->fetch(buffer.data());
475 for (Index_ x = 0; x < otherdim; ++x) {
476 runner.
add(ext->fetch(buffer.data()));
480 local_output.transfer();
490template<
typename Value_,
typename Index_,
typename Output_>
492 apply(row, *p, output, vopt);
510template<
typename Output_ =
double,
typename Value_,
typename Index_>
513 apply(
false, mat, output.data(), vopt);
521template<
typename Output_ =
double,
typename Value_,
typename Index_>
526template<
typename Output_ =
double,
typename Value_,
typename Index_>
531template<
typename Output_ =
double,
typename Value_,
typename Index_>
551template<
typename Output_ =
double,
typename Value_,
typename Index_>
554 apply(
true, mat, output.data(), vopt);
562template<
typename Output_ =
double,
typename Value_,
typename Index_>
567template<
typename Output_ =
double,
typename Value_,
typename Index_>
572template<
typename Output_ =
double,
typename Value_,
typename Index_>
virtual Index_ ncol() const=0
virtual Index_ nrow() const=0
virtual bool prefer_rows() const=0
virtual std::unique_ptr< MyopicSparseExtractor< Value_, Index_ > > sparse(bool row, const Options &opt) const=0
Local output buffer for running calculations.
Definition utils.hpp:82
Running variances from dense data.
Definition variances.hpp:196
void add(const Value_ *ptr)
Definition variances.hpp:218
void finish()
Definition variances.hpp:243
RunningDense(Index_ num, Output_ *mean, Output_ *variance, bool skip_nan)
Definition variances.hpp:206
Running variances from sparse data.
Definition variances.hpp:294
void finish()
Definition variances.hpp:352
RunningSparse(Index_ num, Output_ *mean, Output_ *variance, bool skip_nan, Index_ subtract=0)
Definition variances.hpp:307
void add(const Value_ *value, const Index_ *index, Index_ number)
Definition variances.hpp:323
std::vector< Output_ > by_column(const tatami::Matrix< Value_, Index_ > &mat, const Options &vopt)
Definition variances.hpp:511
std::pair< Output_, Output_ > direct(const Value_ *value, Index_ num_nonzero, Index_ num_all, bool skip_nan)
Definition variances.hpp:99
void apply(bool row, const tatami::Matrix< Value_, Index_ > &mat, Output_ *output, const Options &vopt)
Definition variances.hpp:417
std::vector< Output_ > by_row(const tatami::Matrix< Value_, Index_ > &mat, const Options &vopt)
Definition variances.hpp:552
Functions to compute statistics from a tatami::Matrix.
Definition counts.hpp:18
void parallelize(Function_ fun, Index_ tasks, int threads)
Container_ create_container_of_Index_size(Index_ x, Args_ &&... args)
auto consecutive_extractor(const Matrix< Value_, Index_ > &matrix, bool row, Index_ iter_start, Index_ iter_length, Args_ &&... args)
bool sparse_extract_index
Variance calculation options.
Definition variances.hpp:33
int num_threads
Definition variances.hpp:44
bool skip_nan
Definition variances.hpp:38
Utilities for computing matrix statistics.