1#ifndef TATAMI_STATS_VARS_HPP
2#define TATAMI_STATS_VARS_HPP
50template<
typename Output_ =
double,
typename Value_,
typename Index_ >
51void add_welford(Output_& mean, Output_& sumsq, Value_ value, Index_ count) {
52 Output_ delta = value - mean;
53 mean += delta / count;
54 sumsq += delta * (value - mean);
57template<
typename Output_ =
double,
typename Index_ >
58void add_welford_zeros(Output_& mean, Output_& sumsq, Index_ num_nonzero, Index_ num_all) {
59 auto ratio =
static_cast<Output_
>(num_nonzero) /
static_cast<Output_
>(num_all);
60 sumsq += mean * mean * ratio * (num_all - num_nonzero);
65template<
typename Index_>
67 MockVector(std::size_t) {}
68 Index_& operator[](std::size_t) {
return out; }
95template<
typename Output_ =
double,
typename Value_,
typename Index_ >
96std::pair<Output_, Output_>
direct(
const Value_* value, Index_ num_nonzero, Index_ num_all,
bool skip_nan) {
100 ::tatami_stats::internal::nanable_ifelse<Value_>(
104 for (Index_ i = 0; i < num_nonzero; ++i, ++copy) {
106 if (std::isnan(val)) {
115 for (Index_ i = 0; i < num_nonzero; ++i, ++copy) {
121 auto count = num_all - lost;
125 ::tatami_stats::internal::nanable_ifelse<Value_>(
128 for (Index_ i = 0; i < num_nonzero; ++i) {
130 if (!std::isnan(val)) {
131 auto delta =
static_cast<Output_
>(val) - mean;
132 var += delta * delta;
137 for (Index_ i = 0; i < num_nonzero; ++i) {
138 auto delta =
static_cast<Output_
>(value[i]) - mean;
139 var += delta * delta;
144 if (num_nonzero < num_all) {
145 var +=
static_cast<Output_
>(num_all - num_nonzero) * mean * mean;
149 return std::make_pair(std::numeric_limits<Output_>::quiet_NaN(), std::numeric_limits<Output_>::quiet_NaN());
150 }
else if (count == 1) {
151 return std::make_pair(mean, std::numeric_limits<Output_>::quiet_NaN());
153 return std::make_pair(mean, var / (count - 1));
173template<
typename Output_ =
double,
typename Value_,
typename Index_ >
174std::pair<Output_, Output_>
direct(
const Value_* ptr, Index_ num,
bool skip_nan) {
192template<
typename Output_,
typename Value_,
typename Index_>
203 RunningDense(Index_ num, Output_* mean, Output_* variance,
bool skip_nan) :
204 my_num(num), my_mean(mean), my_variance(variance), my_skip_nan(skip_nan), my_ok_count(skip_nan ? num : 0) {}
210 void add(
const Value_* ptr) {
211 ::tatami_stats::internal::nanable_ifelse<Value_>(
214 for (Index_ i = 0; i < my_num; ++i, ++ptr) {
216 if (!std::isnan(val)) {
217 internal::add_welford(my_mean[i], my_variance[i], val, ++(my_ok_count[i]));
223 for (Index_ i = 0; i < my_num; ++i, ++ptr) {
224 internal::add_welford(my_mean[i], my_variance[i], *ptr, my_count);
234 ::tatami_stats::internal::nanable_ifelse<Value_>(
237 for (Index_ i = 0; i < my_num; ++i) {
238 auto ct = my_ok_count[i];
240 my_variance[i] = std::numeric_limits<Output_>::quiet_NaN();
242 my_mean[i] = std::numeric_limits<Output_>::quiet_NaN();
245 my_variance[i] /= ct - 1;
251 std::fill_n(my_variance, my_num, std::numeric_limits<Output_>::quiet_NaN());
253 std::fill_n(my_mean, my_num, std::numeric_limits<Output_>::quiet_NaN());
256 for (Index_ i = 0; i < my_num; ++i) {
257 my_variance[i] /= my_count - 1;
267 Output_* my_variance;
270 typename std::conditional<std::numeric_limits<Value_>::has_quiet_NaN, std::vector<Index_>, internal::MockVector<Index_> >::type my_ok_count;
283template<
typename Output_,
typename Value_,
typename Index_>
297 RunningSparse(Index_ num, Output_* mean, Output_* variance,
bool skip_nan, Index_ subtract = 0) :
298 my_num(num), my_mean(mean), my_variance(variance), my_nonzero(num), my_skip_nan(skip_nan), my_subtract(subtract), my_nan(skip_nan ? num : 0) {}
306 void add(
const Value_* value,
const Index_* index, Index_ number) {
309 ::tatami_stats::internal::nanable_ifelse<Value_>(
312 for (Index_ i = 0; i < number; ++i) {
314 auto ri = index[i] - my_subtract;
315 if (std::isnan(val)) {
318 internal::add_welford(my_mean[ri], my_variance[ri], val, ++(my_nonzero[ri]));
323 for (Index_ i = 0; i < number; ++i) {
324 auto ri = index[i] - my_subtract;
325 internal::add_welford(my_mean[ri], my_variance[ri], value[i], ++(my_nonzero[ri]));
335 ::tatami_stats::internal::nanable_ifelse<Value_>(
338 for (Index_ i = 0; i < my_num; ++i) {
339 auto& curM = my_mean[i];
340 auto& curV = my_variance[i];
341 Index_ ct = my_count - my_nan[i];
344 curV = std::numeric_limits<Output_>::quiet_NaN();
346 curM = std::numeric_limits<Output_>::quiet_NaN();
349 internal::add_welford_zeros(curM, curV, my_nonzero[i], ct);
356 std::fill_n(my_variance, my_num, std::numeric_limits<Output_>::quiet_NaN());
358 std::fill_n(my_mean, my_num, std::numeric_limits<Output_>::quiet_NaN());
361 for (Index_ i = 0; i < my_num; ++i) {
362 auto& var = my_variance[i];
363 internal::add_welford_zeros(my_mean[i], var, my_nonzero[i], my_count);
374 Output_* my_variance;
375 std::vector<Index_> my_nonzero;
379 typename std::conditional<std::numeric_limits<Value_>::has_quiet_NaN, std::vector<Index_>, internal::MockVector<Index_> >::type my_nan;
398template<
typename Value_,
typename Index_,
typename Output_>
400 auto dim = (row ? mat.
nrow() : mat.
ncol());
401 auto otherdim = (row ? mat.
ncol() : mat.
nrow());
410 std::vector<Value_> vbuffer(otherdim);
411 for (Index_ x = 0; x < l; ++x) {
412 auto out = ext->fetch(vbuffer.data(), NULL);
420 std::vector<Value_> vbuffer(l);
421 std::vector<Index_> ibuffer(l);
423 std::vector<Output_> running_means(l);
427 for (Index_ x = 0; x < otherdim; ++x) {
428 auto out = ext->fetch(vbuffer.data(), ibuffer.data());
429 runner.
add(out.value, out.index, out.number);
441 std::vector<Value_> buffer(otherdim);
442 for (Index_ x = 0; x < l; ++x) {
443 auto out = ext->fetch(buffer.data());
451 std::vector<Value_> buffer(l);
453 std::vector<Output_> running_means(l);
457 for (Index_ x = 0; x < otherdim; ++x) {
458 runner.
add(ext->fetch(buffer.data()));
472template<
typename Value_,
typename Index_,
typename Output_>
474 apply(row, *p, output, vopt);
492template<
typename Output_ =
double,
typename Value_,
typename Index_>
494 std::vector<Output_> output(mat.
ncol());
495 apply(
false, mat, output.data(), vopt);
503template<
typename Output_ =
double,
typename Value_,
typename Index_>
508template<
typename Output_ =
double,
typename Value_,
typename Index_>
513template<
typename Output_ =
double,
typename Value_,
typename Index_>
533template<
typename Output_ =
double,
typename Value_,
typename Index_>
535 std::vector<Output_> output(mat.
nrow());
536 apply(
true, mat, output.data(), vopt);
544template<
typename Output_ =
double,
typename Value_,
typename Index_>
549template<
typename Output_ =
double,
typename Value_,
typename Index_>
554template<
typename Output_ =
double,
typename Value_,
typename Index_>
virtual Index_ ncol() const=0
virtual Index_ nrow() const=0
virtual bool prefer_rows() const=0
virtual std::unique_ptr< MyopicSparseExtractor< Value_, Index_ > > sparse(bool row, const Options &opt) const=0
Local output buffer for running calculations.
Definition utils.hpp:79
void transfer()
Definition utils.hpp:136
Output_ * data()
Definition utils.hpp:119
Running variances from dense data.
Definition variances.hpp:193
void add(const Value_ *ptr)
Definition variances.hpp:210
void finish()
Definition variances.hpp:233
RunningDense(Index_ num, Output_ *mean, Output_ *variance, bool skip_nan)
Definition variances.hpp:203
Running variances from sparse data.
Definition variances.hpp:284
void finish()
Definition variances.hpp:334
RunningSparse(Index_ num, Output_ *mean, Output_ *variance, bool skip_nan, Index_ subtract=0)
Definition variances.hpp:297
void add(const Value_ *value, const Index_ *index, Index_ number)
Definition variances.hpp:306
std::vector< Output_ > by_column(const tatami::Matrix< Value_, Index_ > &mat, const Options &vopt)
Definition variances.hpp:493
std::pair< Output_, Output_ > direct(const Value_ *value, Index_ num_nonzero, Index_ num_all, bool skip_nan)
Definition variances.hpp:96
void apply(bool row, const tatami::Matrix< Value_, Index_ > &mat, Output_ *output, const Options &vopt)
Definition variances.hpp:399
std::vector< Output_ > by_row(const tatami::Matrix< Value_, Index_ > &mat, const Options &vopt)
Definition variances.hpp:534
Functions to compute statistics from a tatami::Matrix.
Definition counts.hpp:18
void parallelize(Function_ fun, Index_ tasks, int threads)
auto consecutive_extractor(const Matrix< Value_, Index_ > &matrix, bool row, Index_ iter_start, Index_ iter_length, Args_ &&... args)
bool sparse_extract_index
Variance calculation options.
Definition variances.hpp:31
int num_threads
Definition variances.hpp:42
bool skip_nan
Definition variances.hpp:36
Utilities for computing matrix statistics.