tatami_stats
Matrix statistics for tatami
Loading...
Searching...
No Matches
ranges.hpp
Go to the documentation of this file.
1#ifndef TATAMI_STATS_RANGES_HPP
2#define TATAMI_STATS_RANGES_HPP
3
4#include "tatami/tatami.hpp"
5#include "utils.hpp"
6
7#include <vector>
8#include <algorithm>
9#include <type_traits>
10
17namespace tatami_stats {
18
23namespace ranges {
24
28struct Options {
33 bool skip_nan = false;
34
39 int num_threads = 1;
40};
41
45namespace internal {
46
47template<typename Value_>
48constexpr Value_ choose_minimum_placeholder() {
49 // Placeholder value 'x' is such that 'x >= y' is always true for any non-NaN 'y'.
50 if constexpr(std::numeric_limits<Value_>::has_infinity) {
51 return std::numeric_limits<Value_>::infinity();
52 } else {
53 return std::numeric_limits<Value_>::max();
54 }
55}
56
57template<typename Value_>
58constexpr Value_ choose_maximum_placeholder() {
59 // Placeholder value 'x' is such that 'x <= y' is always true for any non-NaN 'y'.
60 if constexpr(std::numeric_limits<Value_>::has_infinity) {
61 return -std::numeric_limits<Value_>::infinity();
62 } else {
63 return std::numeric_limits<Value_>::lowest();
64 }
65}
66
67}
88template<typename Value_, typename Index_>
89Value_ direct(const Value_* ptr, Index_ num, bool minimum, bool skip_nan) {
90 return ::tatami_stats::internal::nanable_ifelse_with_value<Value_>(
91 skip_nan,
92
93 [&]() -> Value_ {
94 if (minimum) {
95 auto current = internal::choose_minimum_placeholder<Value_>();
96 for (Index_ i = 0; i < num; ++i) {
97 auto val = ptr[i];
98 if (val < current) { // no need to explicitly handle NaNs, as any comparison with NaNs is always false.
99 current = val;
100 }
101 }
102 return current;
103 } else {
104 auto current = internal::choose_maximum_placeholder<Value_>();
105 for (Index_ i = 0; i < num; ++i) {
106 auto val = ptr[i];
107 if (val > current) { // again, no need to explicitly handle NaNs, as any comparison with NaNs is always false.
108 current = val;
109 }
110 }
111 return current;
112 }
113 },
114
115 [&]() -> Value_ {
116 if (num) {
117 if (minimum) {
118 return *std::min_element(ptr, ptr + num);
119 } else {
120 return *std::max_element(ptr, ptr + num);
121 }
122 } else {
123 if (minimum) {
124 return internal::choose_minimum_placeholder<Value_>();
125 } else {
126 return internal::choose_maximum_placeholder<Value_>();
127 }
128 }
129 }
130 );
131}
132
151template<typename Value_, typename Index_>
152Value_ direct(const Value_* value, Index_ num_nonzero, Index_ num_all, bool minimum, bool skip_nan) {
153 if (num_nonzero) {
154 auto candidate = direct(value, num_nonzero, minimum, skip_nan);
155 if (num_nonzero < num_all) {
156 if (minimum) {
157 if (candidate > 0) {
158 candidate = 0;
159 }
160 } else {
161 if (candidate < 0) {
162 candidate = 0;
163 }
164 }
165 }
166 return candidate;
167
168 } else if (num_all) {
169 return 0;
170
171 } else {
172 if (minimum) {
173 return internal::choose_minimum_placeholder<Value_>();
174 } else {
175 return internal::choose_maximum_placeholder<Value_>();
176 }
177 }
178}
179
193template<typename Output_, typename Value_, typename Index_>
195public:
206 RunningDense(Index_ num, Output_* store_min, Output_* store_max, bool skip_nan) :
207 my_num(num), my_store_min(store_min), my_store_max(store_max), my_skip_nan(skip_nan) {}
208
213 void add(const Value_* ptr) {
214 if (my_init) {
215 my_init = false;
216 ::tatami_stats::internal::nanable_ifelse<Value_>(
217 my_skip_nan,
218
219 [&]() -> void {
220 if (my_store_min) {
221 for (Index_ i = 0; i < my_num; ++i) {
222 auto val = ptr[i];
223 if (std::isnan(val)) {
224 my_store_min[i] = internal::choose_minimum_placeholder<Value_>();
225 } else {
226 my_store_min[i] = val;
227 }
228 }
229 }
230 if (my_store_max) {
231 for (Index_ i = 0; i < my_num; ++i) {
232 auto val = ptr[i];
233 if (std::isnan(val)) {
234 my_store_max[i] = internal::choose_maximum_placeholder<Value_>();
235 } else {
236 my_store_max[i] = val;
237 }
238 }
239 }
240 },
241
242 [&]() -> void {
243 if (my_store_min) {
244 std::copy_n(ptr, my_num, my_store_min);
245 }
246 if (my_store_max) {
247 std::copy_n(ptr, my_num, my_store_max);
248 }
249 }
250 );
251
252 } else {
253 if (my_store_min) {
254 for (Index_ i = 0; i < my_num; ++i) {
255 auto val = ptr[i];
256 auto& current = my_store_min[i];
257 if (val < current) { // this should implicitly skip val=NaNs, as any NaN comparison will be false.
258 current = val;
259 }
260 }
261 }
262 if (my_store_max) {
263 for (Index_ i = 0; i < my_num; ++i) {
264 auto val = ptr[i];
265 auto& current = my_store_max[i];
266 if (val > current) { // this should implicitly skip val=NaNs, as any NaN comparison will be false.
267 current = val;
268 }
269 }
270 }
271 }
272 }
273
277 void finish() {
278 if (my_init) {
279 if (my_store_min) {
280 std::fill_n(my_store_min, my_num, internal::choose_minimum_placeholder<Value_>());
281 }
282 if (my_store_max) {
283 std::fill_n(my_store_max, my_num, internal::choose_maximum_placeholder<Value_>());
284 }
285 }
286 }
287
288private:
289 bool my_init = true;
290 Index_ my_num;
291 Output_* my_store_min;
292 Output_* my_store_max;
293 bool my_skip_nan;
294};
295
306template<typename Output_, typename Value_, typename Index_>
308public:
322 RunningSparse(Index_ num, Output_* store_min, Output_* store_max, bool skip_nan, Index_ subtract = 0) :
323 my_num(num), my_store_min(store_min), my_store_max(store_max), my_skip_nan(skip_nan), my_subtract(subtract) {}
324
331 void add(const Value_* value, const Index_* index, Index_ number) {
332 if (my_count == 0) {
333 my_nonzero.resize(my_num);
334
335 if (my_store_min) {
336 std::fill_n(my_store_min, my_num, internal::choose_minimum_placeholder<Value_>());
337 }
338 if (my_store_max) {
339 std::fill_n(my_store_max, my_num, internal::choose_maximum_placeholder<Value_>());
340 }
341
342 if (!my_skip_nan) {
343 for (Index_ i = 0; i < number; ++i) {
344 auto val = value[i];
345 auto idx = index[i] - my_subtract;
346 if (my_store_min) {
347 my_store_min[idx] = val;
348 }
349 if (my_store_max) {
350 my_store_max[idx] = val;
351 }
352 ++my_nonzero[idx];
353 }
354 my_count = 1;
355 return;
356 }
357 }
358
359 for (Index_ i = 0; i < number; ++i) {
360 auto val = value[i];
361 auto idx = index[i] - my_subtract;
362 if (my_store_min) { // this should implicitly skip NaNs, any NaN comparison will be false.
363 auto& current = my_store_min[idx];
364 if (current > val) {
365 current = val;
366 }
367 }
368 if (my_store_max) {
369 auto& current = my_store_max[idx];
370 if (current < val) {
371 current = val;
372 }
373 }
374 ++my_nonzero[idx];
375 }
376
377 ++my_count;
378 }
379
383 void finish() {
384 if (my_count) {
385 for (Index_ i = 0; i < my_num; ++i) {
386 if (my_count > my_nonzero[i]) {
387 if (my_store_min) {
388 auto& current = my_store_min[i];
389 if (current > 0) {
390 current = 0;
391 }
392 }
393 if (my_store_max) {
394 auto& current = my_store_max[i];
395 if (current < 0) {
396 current = 0;
397 }
398 }
399 }
400 }
401 } else {
402 if (my_store_min) {
403 std::fill_n(my_store_min, my_num, internal::choose_minimum_placeholder<Value_>());
404 }
405 if (my_store_max) {
406 std::fill_n(my_store_max, my_num, internal::choose_maximum_placeholder<Value_>());
407 }
408 }
409 }
410
411private:
412 Index_ my_num;
413 Output_* my_store_min;
414 Output_* my_store_max;
415 bool my_skip_nan;
416 Index_ my_subtract;
417 Index_ my_count = 0;
418 std::vector<Index_> my_nonzero;
419};
420
439template<typename Value_, typename Index_, typename Output_>
440void apply(bool row, const tatami::Matrix<Value_, Index_>& mat, Output_* min_out, Output_* max_out, const Options& ropt) {
441 auto dim = (row ? mat.nrow() : mat.ncol());
442 auto otherdim = (row ? mat.ncol() : mat.nrow());
443 const bool direct = mat.prefer_rows() == row;
444
445 bool store_min = min_out != NULL;
446 bool store_max = max_out != NULL;
447
448 if (mat.sparse()) {
449 tatami::Options opt;
450 opt.sparse_ordered_index = false;
451
452 if (direct) {
453 opt.sparse_extract_index = false;
454 tatami::parallelize([&](int, Index_ s, Index_ l) -> void {
455 auto ext = tatami::consecutive_extractor<true>(mat, row, s, l, opt);
456 std::vector<Value_> vbuffer(otherdim);
457 for (Index_ x = 0; x < l; ++x) {
458 auto out = ext->fetch(vbuffer.data(), NULL);
459 if (store_min) {
460 min_out[x + s] = ranges::direct(out.value, out.number, otherdim, true, ropt.skip_nan);
461 }
462 if (store_max) {
463 max_out[x + s] = ranges::direct(out.value, out.number, otherdim, false, ropt.skip_nan);
464 }
465 }
466 }, dim, ropt.num_threads);
467
468 } else {
469 tatami::parallelize([&](int thread, Index_ s, Index_ l) -> void {
470 auto ext = tatami::consecutive_extractor<true>(mat, !row, static_cast<Index_>(0), otherdim, s, l, opt);
471 std::vector<Value_> vbuffer(l);
472 std::vector<Index_> ibuffer(l);
473
474 auto local_min = (store_min ? LocalOutputBuffer<Output_>(thread, s, l, min_out) : LocalOutputBuffer<Output_>());
475 auto local_max = (store_max ? LocalOutputBuffer<Output_>(thread, s, l, max_out) : LocalOutputBuffer<Output_>());
476 ranges::RunningSparse<Output_, Value_, Index_> runner(l, local_min.data(), local_max.data(), ropt.skip_nan, s);
477
478 for (Index_ x = 0; x < otherdim; ++x) {
479 auto out = ext->fetch(vbuffer.data(), ibuffer.data());
480 runner.add(out.value, out.index, out.number);
481 }
482
483 runner.finish();
484 if (store_min) {
485 local_min.transfer();
486 }
487 if (store_max) {
488 local_max.transfer();
489 }
490 }, dim, ropt.num_threads);
491 }
492
493 } else {
494 if (direct) {
495 tatami::parallelize([&](int, Index_ s, Index_ l) -> void {
496 auto ext = tatami::consecutive_extractor<false>(mat, row, s, l);
497 std::vector<Value_> buffer(otherdim);
498 for (Index_ x = 0; x < l; ++x) {
499 auto ptr = ext->fetch(buffer.data());
500 if (store_min) {
501 min_out[x + s] = ranges::direct(ptr, otherdim, true, ropt.skip_nan);
502 }
503 if (store_max) {
504 max_out[x + s] = ranges::direct(ptr, otherdim, false, ropt.skip_nan);
505 }
506 }
507 }, dim, ropt.num_threads);
508
509 } else {
510 tatami::parallelize([&](int thread, Index_ s, Index_ l) -> void {
511 auto ext = tatami::consecutive_extractor<false>(mat, !row, static_cast<Index_>(0), otherdim, s, l);
512 std::vector<Value_> buffer(l);
513
514 auto local_min = (store_min ? LocalOutputBuffer<Output_>(thread, s, l, min_out) : LocalOutputBuffer<Output_>());
515 auto local_max = (store_max ? LocalOutputBuffer<Output_>(thread, s, l, max_out) : LocalOutputBuffer<Output_>());
516 ranges::RunningDense<Output_, Value_, Index_> runner(l, local_min.data(), local_max.data(), ropt.skip_nan);
517
518 for (Index_ x = 0; x < otherdim; ++x) {
519 auto ptr = ext->fetch(buffer.data());
520 runner.add(ptr);
521 }
522
523 runner.finish();
524 if (store_min) {
525 local_min.transfer();
526 }
527 if (store_max) {
528 local_max.transfer();
529 }
530 }, dim, ropt.num_threads);
531 }
532 }
533
534 return;
535}
536
540// Back-compatibility.
541template<typename Value_, typename Index_, typename Output_>
542void apply(bool row, const tatami::Matrix<Value_, Index_>* p, Output_* min_out, Output_* max_out, const Options& ropt) {
543 apply(row, *p, min_out, max_out, ropt);
544}
562template<typename Output_ = double, typename Value_, typename Index_>
563std::pair<std::vector<Output_>, std::vector<Output_> > by_column(const tatami::Matrix<Value_, Index_>& mat, const Options& ropt) {
564 std::vector<Output_> mins(mat.ncol()), maxs(mat.ncol());
565 apply(false, mat, mins.data(), maxs.data(), ropt);
566 return std::make_pair(std::move(mins), std::move(maxs));
567}
568
572// Back-compatibility.
573template<typename Output_ = double, typename Value_, typename Index_>
574std::pair<std::vector<Output_>, std::vector<Output_> > by_column(const tatami::Matrix<Value_, Index_>* p, const Options& ropt) {
575 return by_column<Output_>(*p, ropt);
576}
577
578template<typename Output_ = double, typename Value_, typename Index_>
579std::pair<std::vector<Output_>, std::vector<Output_> > by_column(const tatami::Matrix<Value_, Index_>& mat) {
580 return by_column<Output_>(mat, {});
581}
582
583template<typename Output_ = double, typename Value_, typename Index_>
584std::pair<std::vector<Output_>, std::vector<Output_> > by_column(const tatami::Matrix<Value_, Index_>* p) {
585 return by_column<Output_>(*p);
586}
604template<typename Output_ = double, typename Value_, typename Index_>
605std::pair<std::vector<Output_>, std::vector<Output_> > by_row(const tatami::Matrix<Value_, Index_>& mat, const Options& ropt) {
606 std::vector<Output_> mins(mat.nrow()), maxs(mat.nrow());
607 apply(true, mat, mins.data(), maxs.data(), ropt);
608 return std::make_pair(std::move(mins), std::move(maxs));
609}
610
614// Back-compatibility.
615template<typename Output_ = double, typename Value_, typename Index_>
616std::pair<std::vector<Output_>, std::vector<Output_> > by_row(const tatami::Matrix<Value_, Index_>* p, const Options& ropt) {
617 return by_row<Output_>(*p, ropt);
618}
619
620template<typename Output_ = double, typename Value_, typename Index_>
621std::pair<std::vector<Output_>, std::vector<Output_> > by_row(const tatami::Matrix<Value_, Index_>& mat) {
622 return by_row<Output_>(mat, {});
623}
624
625template<typename Output_ = double, typename Value_, typename Index_>
626std::pair<std::vector<Output_>, std::vector<Output_> > by_row(const tatami::Matrix<Value_, Index_>* p) {
627 return by_row<Output_>(*p);
628}
633}
634
635}
636
637#endif
virtual Index_ ncol() const=0
virtual Index_ nrow() const=0
virtual bool prefer_rows() const=0
virtual std::unique_ptr< MyopicSparseExtractor< Value_, Index_ > > sparse(bool row, const Options &opt) const=0
Local output buffer for running calculations.
Definition utils.hpp:79
Running minima/maxima from dense data.
Definition ranges.hpp:194
void add(const Value_ *ptr)
Definition ranges.hpp:213
void finish()
Definition ranges.hpp:277
RunningDense(Index_ num, Output_ *store_min, Output_ *store_max, bool skip_nan)
Definition ranges.hpp:206
Running minima/maxima from sparse data.
Definition ranges.hpp:307
void finish()
Definition ranges.hpp:383
void add(const Value_ *value, const Index_ *index, Index_ number)
Definition ranges.hpp:331
RunningSparse(Index_ num, Output_ *store_min, Output_ *store_max, bool skip_nan, Index_ subtract=0)
Definition ranges.hpp:322
std::pair< std::vector< Output_ >, std::vector< Output_ > > by_row(const tatami::Matrix< Value_, Index_ > &mat, const Options &ropt)
Definition ranges.hpp:605
void apply(bool row, const tatami::Matrix< Value_, Index_ > &mat, Output_ *min_out, Output_ *max_out, const Options &ropt)
Definition ranges.hpp:440
std::pair< std::vector< Output_ >, std::vector< Output_ > > by_column(const tatami::Matrix< Value_, Index_ > &mat, const Options &ropt)
Definition ranges.hpp:563
Value_ direct(const Value_ *ptr, Index_ num, bool minimum, bool skip_nan)
Definition ranges.hpp:89
Functions to compute statistics from a tatami::Matrix.
Definition counts.hpp:18
void parallelize(Function_ fun, Index_ tasks, int threads)
auto consecutive_extractor(const Matrix< Value_, Index_ > &matrix, bool row, Index_ iter_start, Index_ iter_length, Args_ &&... args)
bool sparse_extract_index
bool sparse_ordered_index
Range calculation options.
Definition ranges.hpp:28
bool skip_nan
Definition ranges.hpp:33
int num_threads
Definition ranges.hpp:39
Utilities for computing matrix statistics.