tatami_stats
Matrix statistics for tatami
Loading...
Searching...
No Matches
ranges.hpp
Go to the documentation of this file.
1#ifndef TATAMI_STATS_RANGES_HPP
2#define TATAMI_STATS_RANGES_HPP
3
4#include "utils.hpp"
5
6#include <vector>
7#include <algorithm>
8#include <type_traits>
9
10#include "tatami/tatami.hpp"
11
18namespace tatami_stats {
19
24namespace ranges {
25
29struct Options {
34 bool skip_nan = false;
35
40 int num_threads = 1;
41};
42
46namespace internal {
47
48template<typename Value_>
49constexpr Value_ choose_minimum_placeholder() {
50 // Placeholder value 'x' is such that 'x >= y' is always true for any non-NaN 'y'.
51 if constexpr(std::numeric_limits<Value_>::has_infinity) {
52 return std::numeric_limits<Value_>::infinity();
53 } else {
54 return std::numeric_limits<Value_>::max();
55 }
56}
57
58template<typename Value_>
59constexpr Value_ choose_maximum_placeholder() {
60 // Placeholder value 'x' is such that 'x <= y' is always true for any non-NaN 'y'.
61 if constexpr(std::numeric_limits<Value_>::has_infinity) {
62 return -std::numeric_limits<Value_>::infinity();
63 } else {
64 return std::numeric_limits<Value_>::lowest();
65 }
66}
67
68}
89template<typename Value_, typename Index_>
90Value_ direct(const Value_* ptr, Index_ num, bool minimum, bool skip_nan) {
91 return ::tatami_stats::internal::nanable_ifelse_with_value<Value_>(
92 skip_nan,
93
94 [&]() -> Value_ {
95 if (minimum) {
96 auto current = internal::choose_minimum_placeholder<Value_>();
97 for (Index_ i = 0; i < num; ++i) {
98 auto val = ptr[i];
99 if (val < current) { // no need to explicitly handle NaNs, as any comparison with NaNs is always false.
100 current = val;
101 }
102 }
103 return current;
104 } else {
105 auto current = internal::choose_maximum_placeholder<Value_>();
106 for (Index_ i = 0; i < num; ++i) {
107 auto val = ptr[i];
108 if (val > current) { // again, no need to explicitly handle NaNs, as any comparison with NaNs is always false.
109 current = val;
110 }
111 }
112 return current;
113 }
114 },
115
116 [&]() -> Value_ {
117 if (num) {
118 if (minimum) {
119 return *std::min_element(ptr, ptr + num);
120 } else {
121 return *std::max_element(ptr, ptr + num);
122 }
123 } else {
124 if (minimum) {
125 return internal::choose_minimum_placeholder<Value_>();
126 } else {
127 return internal::choose_maximum_placeholder<Value_>();
128 }
129 }
130 }
131 );
132}
133
152template<typename Value_, typename Index_>
153Value_ direct(const Value_* value, Index_ num_nonzero, Index_ num_all, bool minimum, bool skip_nan) {
154 if (num_nonzero) {
155 auto candidate = direct(value, num_nonzero, minimum, skip_nan);
156 if (num_nonzero < num_all) {
157 if (minimum) {
158 if (candidate > 0) {
159 candidate = 0;
160 }
161 } else {
162 if (candidate < 0) {
163 candidate = 0;
164 }
165 }
166 }
167 return candidate;
168
169 } else if (num_all) {
170 return 0;
171
172 } else {
173 if (minimum) {
174 return internal::choose_minimum_placeholder<Value_>();
175 } else {
176 return internal::choose_maximum_placeholder<Value_>();
177 }
178 }
179}
180
195template<typename Output_, typename Value_, typename Index_>
197public:
208 RunningDense(Index_ num, Output_* store_min, Output_* store_max, bool skip_nan) :
209 my_num(num), my_store_min(store_min), my_store_max(store_max), my_skip_nan(skip_nan) {}
210
215 void add(const Value_* ptr) {
216 if (my_init) {
217 my_init = false;
218 ::tatami_stats::internal::nanable_ifelse<Value_>(
219 my_skip_nan,
220
221 [&]() -> void {
222 if (my_store_min) {
223 for (Index_ i = 0; i < my_num; ++i) {
224 auto val = ptr[i];
225 if (std::isnan(val)) {
226 my_store_min[i] = internal::choose_minimum_placeholder<Value_>();
227 } else {
228 my_store_min[i] = val;
229 }
230 }
231 }
232 if (my_store_max) {
233 for (Index_ i = 0; i < my_num; ++i) {
234 auto val = ptr[i];
235 if (std::isnan(val)) {
236 my_store_max[i] = internal::choose_maximum_placeholder<Value_>();
237 } else {
238 my_store_max[i] = val;
239 }
240 }
241 }
242 },
243
244 [&]() -> void {
245 if (my_store_min) {
246 std::copy_n(ptr, my_num, my_store_min);
247 }
248 if (my_store_max) {
249 std::copy_n(ptr, my_num, my_store_max);
250 }
251 }
252 );
253
254 } else {
255 if (my_store_min) {
256 for (Index_ i = 0; i < my_num; ++i) {
257 auto val = ptr[i];
258 auto& current = my_store_min[i];
259 if (val < current) { // this should implicitly skip val=NaNs, as any NaN comparison will be false.
260 current = val;
261 }
262 }
263 }
264 if (my_store_max) {
265 for (Index_ i = 0; i < my_num; ++i) {
266 auto val = ptr[i];
267 auto& current = my_store_max[i];
268 if (val > current) { // this should implicitly skip val=NaNs, as any NaN comparison will be false.
269 current = val;
270 }
271 }
272 }
273 }
274 }
275
279 void finish() {
280 if (my_init) {
281 if (my_store_min) {
282 std::fill_n(my_store_min, my_num, internal::choose_minimum_placeholder<Value_>());
283 }
284 if (my_store_max) {
285 std::fill_n(my_store_max, my_num, internal::choose_maximum_placeholder<Value_>());
286 }
287 }
288 }
289
290private:
291 bool my_init = true;
292 Index_ my_num;
293 Output_* my_store_min;
294 Output_* my_store_max;
295 bool my_skip_nan;
296};
297
309template<typename Output_, typename Value_, typename Index_>
311public:
325 RunningSparse(Index_ num, Output_* store_min, Output_* store_max, bool skip_nan, Index_ subtract = 0) :
326 my_num(num), my_store_min(store_min), my_store_max(store_max), my_skip_nan(skip_nan), my_subtract(subtract) {}
327
334 void add(const Value_* value, const Index_* index, Index_ number) {
335 if (my_count == 0) {
336 tatami::resize_container_to_Index_size(my_nonzero, my_num);
337
338 if (my_store_min) {
339 std::fill_n(my_store_min, my_num, internal::choose_minimum_placeholder<Value_>());
340 }
341 if (my_store_max) {
342 std::fill_n(my_store_max, my_num, internal::choose_maximum_placeholder<Value_>());
343 }
344
345 if (!my_skip_nan) {
346 for (Index_ i = 0; i < number; ++i) {
347 auto val = value[i];
348 auto idx = index[i] - my_subtract;
349 if (my_store_min) {
350 my_store_min[idx] = val;
351 }
352 if (my_store_max) {
353 my_store_max[idx] = val;
354 }
355 ++my_nonzero[idx];
356 }
357 my_count = 1;
358 return;
359 }
360 }
361
362 for (Index_ i = 0; i < number; ++i) {
363 auto val = value[i];
364 auto idx = index[i] - my_subtract;
365 if (my_store_min) { // this should implicitly skip NaNs, any NaN comparison will be false.
366 auto& current = my_store_min[idx];
367 if (current > val) {
368 current = val;
369 }
370 }
371 if (my_store_max) {
372 auto& current = my_store_max[idx];
373 if (current < val) {
374 current = val;
375 }
376 }
377 ++my_nonzero[idx];
378 }
379
380 ++my_count;
381 }
382
386 void finish() {
387 if (my_count) {
388 for (Index_ i = 0; i < my_num; ++i) {
389 if (my_count > my_nonzero[i]) {
390 if (my_store_min) {
391 auto& current = my_store_min[i];
392 if (current > 0) {
393 current = 0;
394 }
395 }
396 if (my_store_max) {
397 auto& current = my_store_max[i];
398 if (current < 0) {
399 current = 0;
400 }
401 }
402 }
403 }
404 } else {
405 if (my_store_min) {
406 std::fill_n(my_store_min, my_num, internal::choose_minimum_placeholder<Value_>());
407 }
408 if (my_store_max) {
409 std::fill_n(my_store_max, my_num, internal::choose_maximum_placeholder<Value_>());
410 }
411 }
412 }
413
414private:
415 Index_ my_num;
416 Output_* my_store_min;
417 Output_* my_store_max;
418 bool my_skip_nan;
419 Index_ my_subtract;
420 Index_ my_count = 0;
421 std::vector<Index_> my_nonzero;
422};
423
443template<typename Value_, typename Index_, typename Output_>
444void apply(bool row, const tatami::Matrix<Value_, Index_>& mat, Output_* min_out, Output_* max_out, const Options& ropt) {
445 auto dim = (row ? mat.nrow() : mat.ncol());
446 auto otherdim = (row ? mat.ncol() : mat.nrow());
447 const bool direct = mat.prefer_rows() == row;
448
449 bool store_min = min_out != NULL;
450 bool store_max = max_out != NULL;
451
452 if (mat.sparse()) {
453 tatami::Options opt;
454 opt.sparse_ordered_index = false;
455
456 if (direct) {
457 opt.sparse_extract_index = false;
458 tatami::parallelize([&](int, Index_ s, Index_ l) -> void {
459 auto ext = tatami::consecutive_extractor<true>(mat, row, s, l, opt);
461 for (Index_ x = 0; x < l; ++x) {
462 auto out = ext->fetch(vbuffer.data(), NULL);
463 if (store_min) {
464 min_out[x + s] = ranges::direct(out.value, out.number, otherdim, true, ropt.skip_nan);
465 }
466 if (store_max) {
467 max_out[x + s] = ranges::direct(out.value, out.number, otherdim, false, ropt.skip_nan);
468 }
469 }
470 }, dim, ropt.num_threads);
471
472 } else {
473 tatami::parallelize([&](int thread, Index_ s, Index_ l) -> void {
474 auto ext = tatami::consecutive_extractor<true>(mat, !row, static_cast<Index_>(0), otherdim, s, l, opt);
477
478 auto local_min = (store_min ? LocalOutputBuffer<Output_>(thread, s, l, min_out) : LocalOutputBuffer<Output_>());
479 auto local_max = (store_max ? LocalOutputBuffer<Output_>(thread, s, l, max_out) : LocalOutputBuffer<Output_>());
480 ranges::RunningSparse<Output_, Value_, Index_> runner(l, local_min.data(), local_max.data(), ropt.skip_nan, s);
481
482 for (Index_ x = 0; x < otherdim; ++x) {
483 auto out = ext->fetch(vbuffer.data(), ibuffer.data());
484 runner.add(out.value, out.index, out.number);
485 }
486
487 runner.finish();
488 if (store_min) {
489 local_min.transfer();
490 }
491 if (store_max) {
492 local_max.transfer();
493 }
494 }, dim, ropt.num_threads);
495 }
496
497 } else {
498 if (direct) {
499 tatami::parallelize([&](int, Index_ s, Index_ l) -> void {
500 auto ext = tatami::consecutive_extractor<false>(mat, row, s, l);
502 for (Index_ x = 0; x < l; ++x) {
503 auto ptr = ext->fetch(buffer.data());
504 if (store_min) {
505 min_out[x + s] = ranges::direct(ptr, otherdim, true, ropt.skip_nan);
506 }
507 if (store_max) {
508 max_out[x + s] = ranges::direct(ptr, otherdim, false, ropt.skip_nan);
509 }
510 }
511 }, dim, ropt.num_threads);
512
513 } else {
514 tatami::parallelize([&](int thread, Index_ s, Index_ l) -> void {
515 auto ext = tatami::consecutive_extractor<false>(mat, !row, static_cast<Index_>(0), otherdim, s, l);
517
518 auto local_min = (store_min ? LocalOutputBuffer<Output_>(thread, s, l, min_out) : LocalOutputBuffer<Output_>());
519 auto local_max = (store_max ? LocalOutputBuffer<Output_>(thread, s, l, max_out) : LocalOutputBuffer<Output_>());
520 ranges::RunningDense<Output_, Value_, Index_> runner(l, local_min.data(), local_max.data(), ropt.skip_nan);
521
522 for (Index_ x = 0; x < otherdim; ++x) {
523 auto ptr = ext->fetch(buffer.data());
524 runner.add(ptr);
525 }
526
527 runner.finish();
528 if (store_min) {
529 local_min.transfer();
530 }
531 if (store_max) {
532 local_max.transfer();
533 }
534 }, dim, ropt.num_threads);
535 }
536 }
537
538 return;
539}
540
544// Back-compatibility.
545template<typename Value_, typename Index_, typename Output_>
546void apply(bool row, const tatami::Matrix<Value_, Index_>* p, Output_* min_out, Output_* max_out, const Options& ropt) {
547 apply(row, *p, min_out, max_out, ropt);
548}
567template<typename Output_ = double, typename Value_, typename Index_>
568std::pair<std::vector<Output_>, std::vector<Output_> > by_column(const tatami::Matrix<Value_, Index_>& mat, const Options& ropt) {
570 auto maxs = mins;
571 apply(false, mat, mins.data(), maxs.data(), ropt);
572 return std::make_pair(std::move(mins), std::move(maxs));
573}
574
578// Back-compatibility.
579template<typename Output_ = double, typename Value_, typename Index_>
580std::pair<std::vector<Output_>, std::vector<Output_> > by_column(const tatami::Matrix<Value_, Index_>* p, const Options& ropt) {
581 return by_column<Output_>(*p, ropt);
582}
583
584template<typename Output_ = double, typename Value_, typename Index_>
585std::pair<std::vector<Output_>, std::vector<Output_> > by_column(const tatami::Matrix<Value_, Index_>& mat) {
586 return by_column<Output_>(mat, {});
587}
588
589template<typename Output_ = double, typename Value_, typename Index_>
590std::pair<std::vector<Output_>, std::vector<Output_> > by_column(const tatami::Matrix<Value_, Index_>* p) {
591 return by_column<Output_>(*p);
592}
611template<typename Output_ = double, typename Value_, typename Index_>
612std::pair<std::vector<Output_>, std::vector<Output_> > by_row(const tatami::Matrix<Value_, Index_>& mat, const Options& ropt) {
614 auto maxs = mins;
615 apply(true, mat, mins.data(), maxs.data(), ropt);
616 return std::make_pair(std::move(mins), std::move(maxs));
617}
618
622// Back-compatibility.
623template<typename Output_ = double, typename Value_, typename Index_>
624std::pair<std::vector<Output_>, std::vector<Output_> > by_row(const tatami::Matrix<Value_, Index_>* p, const Options& ropt) {
625 return by_row<Output_>(*p, ropt);
626}
627
628template<typename Output_ = double, typename Value_, typename Index_>
629std::pair<std::vector<Output_>, std::vector<Output_> > by_row(const tatami::Matrix<Value_, Index_>& mat) {
630 return by_row<Output_>(mat, {});
631}
632
633template<typename Output_ = double, typename Value_, typename Index_>
634std::pair<std::vector<Output_>, std::vector<Output_> > by_row(const tatami::Matrix<Value_, Index_>* p) {
635 return by_row<Output_>(*p);
636}
641}
642
643}
644
645#endif
virtual Index_ ncol() const=0
virtual Index_ nrow() const=0
virtual bool prefer_rows() const=0
virtual std::unique_ptr< MyopicSparseExtractor< Value_, Index_ > > sparse(bool row, const Options &opt) const=0
Local output buffer for running calculations.
Definition utils.hpp:93
Running minima/maxima from dense data.
Definition ranges.hpp:196
void add(const Value_ *ptr)
Definition ranges.hpp:215
void finish()
Definition ranges.hpp:279
RunningDense(Index_ num, Output_ *store_min, Output_ *store_max, bool skip_nan)
Definition ranges.hpp:208
Running minima/maxima from sparse data.
Definition ranges.hpp:310
void finish()
Definition ranges.hpp:386
void add(const Value_ *value, const Index_ *index, Index_ number)
Definition ranges.hpp:334
RunningSparse(Index_ num, Output_ *store_min, Output_ *store_max, bool skip_nan, Index_ subtract=0)
Definition ranges.hpp:325
std::pair< std::vector< Output_ >, std::vector< Output_ > > by_row(const tatami::Matrix< Value_, Index_ > &mat, const Options &ropt)
Definition ranges.hpp:612
void apply(bool row, const tatami::Matrix< Value_, Index_ > &mat, Output_ *min_out, Output_ *max_out, const Options &ropt)
Definition ranges.hpp:444
std::pair< std::vector< Output_ >, std::vector< Output_ > > by_column(const tatami::Matrix< Value_, Index_ > &mat, const Options &ropt)
Definition ranges.hpp:568
Value_ direct(const Value_ *ptr, Index_ num, bool minimum, bool skip_nan)
Definition ranges.hpp:90
Functions to compute statistics from a tatami::Matrix.
Definition counts.hpp:18
void parallelize(Function_ fun, const Index_ tasks, const int threads)
void resize_container_to_Index_size(Container_ &container, const Index_ x, Args_ &&... args)
Container_ create_container_of_Index_size(const Index_ x, Args_ &&... args)
auto consecutive_extractor(const Matrix< Value_, Index_ > &matrix, const bool row, const Index_ iter_start, const Index_ iter_length, Args_ &&... args)
bool sparse_extract_index
bool sparse_ordered_index
Range calculation options.
Definition ranges.hpp:29
bool skip_nan
Definition ranges.hpp:34
int num_threads
Definition ranges.hpp:40
Utilities for computing matrix statistics.