1#ifndef TATAMI_STATS_VARS_HPP
2#define TATAMI_STATS_VARS_HPP
51template<
typename Output_ =
double,
typename Value_,
typename Index_ >
52void add_welford(Output_& mean, Output_& sumsq, Value_ value, Index_ count) {
53 Output_ delta = value - mean;
54 mean += delta / count;
55 sumsq += delta * (value - mean);
58template<
typename Output_ =
double,
typename Index_ >
59void add_welford_zeros(Output_& mean, Output_& sumsq, Index_ num_nonzero, Index_ num_all) {
60 auto ratio =
static_cast<Output_
>(num_nonzero) /
static_cast<Output_
>(num_all);
61 sumsq += mean * mean * ratio * (num_all - num_nonzero);
66template<
typename Index_>
68 MockVector(std::size_t) {}
69 Index_& operator[](std::size_t) {
return out; }
70 std::size_t size() {
return 0; }
97template<
typename Output_ =
double,
typename Value_,
typename Index_ >
98std::pair<Output_, Output_>
direct(
const Value_* value, Index_ num_nonzero, Index_ num_all,
bool skip_nan) {
102 ::tatami_stats::internal::nanable_ifelse<Value_>(
106 for (Index_ i = 0; i < num_nonzero; ++i, ++copy) {
108 if (std::isnan(val)) {
117 for (Index_ i = 0; i < num_nonzero; ++i, ++copy) {
123 auto count = num_all - lost;
127 ::tatami_stats::internal::nanable_ifelse<Value_>(
130 for (Index_ i = 0; i < num_nonzero; ++i) {
132 if (!std::isnan(val)) {
133 auto delta =
static_cast<Output_
>(val) - mean;
134 var += delta * delta;
139 for (Index_ i = 0; i < num_nonzero; ++i) {
140 auto delta =
static_cast<Output_
>(value[i]) - mean;
141 var += delta * delta;
146 if (num_nonzero < num_all) {
147 var +=
static_cast<Output_
>(num_all - num_nonzero) * mean * mean;
151 return std::make_pair(std::numeric_limits<Output_>::quiet_NaN(), std::numeric_limits<Output_>::quiet_NaN());
152 }
else if (count == 1) {
153 return std::make_pair(mean, std::numeric_limits<Output_>::quiet_NaN());
155 return std::make_pair(mean, var / (count - 1));
175template<
typename Output_ =
double,
typename Value_,
typename Index_ >
176std::pair<Output_, Output_>
direct(
const Value_* ptr, Index_ num,
bool skip_nan) {
194template<
typename Output_,
typename Value_,
typename Index_>
205 RunningDense(Index_ num, Output_* mean, Output_* variance,
bool skip_nan) :
208 my_variance(variance),
209 my_skip_nan(skip_nan),
210 my_ok_count(skip_nan ?
tatami::can_cast_Index_to_container_size<decltype(my_ok_count)>(num) : static_cast<Index_>(0))
217 void add(
const Value_* ptr) {
218 ::tatami_stats::internal::nanable_ifelse<Value_>(
221 for (Index_ i = 0; i < my_num; ++i, ++ptr) {
223 if (!std::isnan(val)) {
224 internal::add_welford(my_mean[i], my_variance[i], val, ++(my_ok_count[i]));
230 for (Index_ i = 0; i < my_num; ++i, ++ptr) {
231 internal::add_welford(my_mean[i], my_variance[i], *ptr, my_count);
241 ::tatami_stats::internal::nanable_ifelse<Value_>(
244 for (Index_ i = 0; i < my_num; ++i) {
245 auto ct = my_ok_count[i];
247 my_variance[i] = std::numeric_limits<Output_>::quiet_NaN();
249 my_mean[i] = std::numeric_limits<Output_>::quiet_NaN();
252 my_variance[i] /= ct - 1;
258 std::fill_n(my_variance, my_num, std::numeric_limits<Output_>::quiet_NaN());
260 std::fill_n(my_mean, my_num, std::numeric_limits<Output_>::quiet_NaN());
263 for (Index_ i = 0; i < my_num; ++i) {
264 my_variance[i] /= my_count - 1;
274 Output_* my_variance;
277 typename std::conditional<std::numeric_limits<Value_>::has_quiet_NaN, std::vector<Index_>, internal::MockVector<Index_> >::type my_ok_count;
290template<
typename Output_,
typename Value_,
typename Index_>
304 RunningSparse(Index_ num, Output_* mean, Output_* variance,
bool skip_nan, Index_ subtract = 0) :
307 my_variance(variance),
308 my_nonzero(
tatami::can_cast_Index_to_container_size<decltype(my_nonzero)>(num)),
309 my_skip_nan(skip_nan),
310 my_subtract(subtract),
311 my_nan(skip_nan ?
tatami::can_cast_Index_to_container_size<decltype(my_nan)>(num) : static_cast<Index_>(0))
320 void add(
const Value_* value,
const Index_* index, Index_ number) {
323 ::tatami_stats::internal::nanable_ifelse<Value_>(
326 for (Index_ i = 0; i < number; ++i) {
328 auto ri = index[i] - my_subtract;
329 if (std::isnan(val)) {
332 internal::add_welford(my_mean[ri], my_variance[ri], val, ++(my_nonzero[ri]));
337 for (Index_ i = 0; i < number; ++i) {
338 auto ri = index[i] - my_subtract;
339 internal::add_welford(my_mean[ri], my_variance[ri], value[i], ++(my_nonzero[ri]));
349 ::tatami_stats::internal::nanable_ifelse<Value_>(
352 for (Index_ i = 0; i < my_num; ++i) {
353 auto& curM = my_mean[i];
354 auto& curV = my_variance[i];
355 Index_ ct = my_count - my_nan[i];
358 curV = std::numeric_limits<Output_>::quiet_NaN();
360 curM = std::numeric_limits<Output_>::quiet_NaN();
363 internal::add_welford_zeros(curM, curV, my_nonzero[i], ct);
370 std::fill_n(my_variance, my_num, std::numeric_limits<Output_>::quiet_NaN());
372 std::fill_n(my_mean, my_num, std::numeric_limits<Output_>::quiet_NaN());
375 for (Index_ i = 0; i < my_num; ++i) {
376 auto& var = my_variance[i];
377 internal::add_welford_zeros(my_mean[i], var, my_nonzero[i], my_count);
388 Output_* my_variance;
389 std::vector<Index_> my_nonzero;
393 typename std::conditional<std::numeric_limits<Value_>::has_quiet_NaN, std::vector<Index_>, internal::MockVector<Index_> >::type my_nan;
412template<
typename Value_,
typename Index_,
typename Output_>
414 auto dim = (row ? mat.
nrow() : mat.
ncol());
415 auto otherdim = (row ? mat.
ncol() : mat.
nrow());
425 for (Index_ x = 0; x < l; ++x) {
426 auto out = ext->fetch(vbuffer.data(), NULL);
441 for (Index_ x = 0; x < otherdim; ++x) {
442 auto out = ext->fetch(vbuffer.data(), ibuffer.data());
443 runner.
add(out.value, out.index, out.number);
447 local_output.transfer();
456 for (Index_ x = 0; x < l; ++x) {
457 auto out = ext->fetch(buffer.data());
471 for (Index_ x = 0; x < otherdim; ++x) {
472 runner.
add(ext->fetch(buffer.data()));
476 local_output.transfer();
486template<
typename Value_,
typename Index_,
typename Output_>
488 apply(row, *p, output, vopt);
506template<
typename Output_ =
double,
typename Value_,
typename Index_>
509 apply(
false, mat, output.data(), vopt);
517template<
typename Output_ =
double,
typename Value_,
typename Index_>
522template<
typename Output_ =
double,
typename Value_,
typename Index_>
527template<
typename Output_ =
double,
typename Value_,
typename Index_>
547template<
typename Output_ =
double,
typename Value_,
typename Index_>
550 apply(
true, mat, output.data(), vopt);
558template<
typename Output_ =
double,
typename Value_,
typename Index_>
563template<
typename Output_ =
double,
typename Value_,
typename Index_>
568template<
typename Output_ =
double,
typename Value_,
typename Index_>
virtual Index_ ncol() const=0
virtual Index_ nrow() const=0
virtual bool prefer_rows() const=0
virtual std::unique_ptr< MyopicSparseExtractor< Value_, Index_ > > sparse(bool row, const Options &opt) const=0
Local output buffer for running calculations.
Definition utils.hpp:82
Running variances from dense data.
Definition variances.hpp:195
void add(const Value_ *ptr)
Definition variances.hpp:217
void finish()
Definition variances.hpp:240
RunningDense(Index_ num, Output_ *mean, Output_ *variance, bool skip_nan)
Definition variances.hpp:205
Running variances from sparse data.
Definition variances.hpp:291
void finish()
Definition variances.hpp:348
RunningSparse(Index_ num, Output_ *mean, Output_ *variance, bool skip_nan, Index_ subtract=0)
Definition variances.hpp:304
void add(const Value_ *value, const Index_ *index, Index_ number)
Definition variances.hpp:320
std::vector< Output_ > by_column(const tatami::Matrix< Value_, Index_ > &mat, const Options &vopt)
Definition variances.hpp:507
std::pair< Output_, Output_ > direct(const Value_ *value, Index_ num_nonzero, Index_ num_all, bool skip_nan)
Definition variances.hpp:98
void apply(bool row, const tatami::Matrix< Value_, Index_ > &mat, Output_ *output, const Options &vopt)
Definition variances.hpp:413
std::vector< Output_ > by_row(const tatami::Matrix< Value_, Index_ > &mat, const Options &vopt)
Definition variances.hpp:548
Functions to compute statistics from a tatami::Matrix.
Definition counts.hpp:18
void parallelize(Function_ fun, Index_ tasks, int threads)
Container_ create_container_of_Index_size(Index_ x, Args_ &&... args)
auto consecutive_extractor(const Matrix< Value_, Index_ > &matrix, bool row, Index_ iter_start, Index_ iter_length, Args_ &&... args)
bool sparse_extract_index
Variance calculation options.
Definition variances.hpp:32
int num_threads
Definition variances.hpp:43
bool skip_nan
Definition variances.hpp:37
Utilities for computing matrix statistics.