tatami_stats
Matrix statistics for tatami
Loading...
Searching...
No Matches
ranges.hpp
Go to the documentation of this file.
1#ifndef TATAMI_STATS_RANGES_HPP
2#define TATAMI_STATS_RANGES_HPP
3
4#include "utils.hpp"
5
6#include <vector>
7#include <algorithm>
8#include <type_traits>
9
10#include "tatami/tatami.hpp"
11
18namespace tatami_stats {
19
24namespace ranges {
25
29struct Options {
34 bool skip_nan = false;
35
40 int num_threads = 1;
41};
42
46namespace internal {
47
48template<typename Value_>
49constexpr Value_ choose_minimum_placeholder() {
50 // Placeholder value 'x' is such that 'x >= y' is always true for any non-NaN 'y'.
51 if constexpr(std::numeric_limits<Value_>::has_infinity) {
52 return std::numeric_limits<Value_>::infinity();
53 } else {
54 return std::numeric_limits<Value_>::max();
55 }
56}
57
58template<typename Value_>
59constexpr Value_ choose_maximum_placeholder() {
60 // Placeholder value 'x' is such that 'x <= y' is always true for any non-NaN 'y'.
61 if constexpr(std::numeric_limits<Value_>::has_infinity) {
62 return -std::numeric_limits<Value_>::infinity();
63 } else {
64 return std::numeric_limits<Value_>::lowest();
65 }
66}
67
68}
89template<typename Value_, typename Index_>
90Value_ direct(const Value_* ptr, Index_ num, bool minimum, bool skip_nan) {
91 return ::tatami_stats::internal::nanable_ifelse_with_value<Value_>(
92 skip_nan,
93
94 [&]() -> Value_ {
95 if (minimum) {
96 auto current = internal::choose_minimum_placeholder<Value_>();
97 for (Index_ i = 0; i < num; ++i) {
98 auto val = ptr[i];
99 if (val < current) { // no need to explicitly handle NaNs, as any comparison with NaNs is always false.
100 current = val;
101 }
102 }
103 return current;
104 } else {
105 auto current = internal::choose_maximum_placeholder<Value_>();
106 for (Index_ i = 0; i < num; ++i) {
107 auto val = ptr[i];
108 if (val > current) { // again, no need to explicitly handle NaNs, as any comparison with NaNs is always false.
109 current = val;
110 }
111 }
112 return current;
113 }
114 },
115
116 [&]() -> Value_ {
117 if (num) {
118 if (minimum) {
119 return *std::min_element(ptr, ptr + num);
120 } else {
121 return *std::max_element(ptr, ptr + num);
122 }
123 } else {
124 if (minimum) {
125 return internal::choose_minimum_placeholder<Value_>();
126 } else {
127 return internal::choose_maximum_placeholder<Value_>();
128 }
129 }
130 }
131 );
132}
133
152template<typename Value_, typename Index_>
153Value_ direct(const Value_* value, Index_ num_nonzero, Index_ num_all, bool minimum, bool skip_nan) {
154 if (num_nonzero) {
155 auto candidate = direct(value, num_nonzero, minimum, skip_nan);
156 if (num_nonzero < num_all) {
157 if (minimum) {
158 if (candidate > 0) {
159 candidate = 0;
160 }
161 } else {
162 if (candidate < 0) {
163 candidate = 0;
164 }
165 }
166 }
167 return candidate;
168
169 } else if (num_all) {
170 return 0;
171
172 } else {
173 if (minimum) {
174 return internal::choose_minimum_placeholder<Value_>();
175 } else {
176 return internal::choose_maximum_placeholder<Value_>();
177 }
178 }
179}
180
194template<typename Output_, typename Value_, typename Index_>
196public:
207 RunningDense(Index_ num, Output_* store_min, Output_* store_max, bool skip_nan) :
208 my_num(num), my_store_min(store_min), my_store_max(store_max), my_skip_nan(skip_nan) {}
209
214 void add(const Value_* ptr) {
215 if (my_init) {
216 my_init = false;
217 ::tatami_stats::internal::nanable_ifelse<Value_>(
218 my_skip_nan,
219
220 [&]() -> void {
221 if (my_store_min) {
222 for (Index_ i = 0; i < my_num; ++i) {
223 auto val = ptr[i];
224 if (std::isnan(val)) {
225 my_store_min[i] = internal::choose_minimum_placeholder<Value_>();
226 } else {
227 my_store_min[i] = val;
228 }
229 }
230 }
231 if (my_store_max) {
232 for (Index_ i = 0; i < my_num; ++i) {
233 auto val = ptr[i];
234 if (std::isnan(val)) {
235 my_store_max[i] = internal::choose_maximum_placeholder<Value_>();
236 } else {
237 my_store_max[i] = val;
238 }
239 }
240 }
241 },
242
243 [&]() -> void {
244 if (my_store_min) {
245 std::copy_n(ptr, my_num, my_store_min);
246 }
247 if (my_store_max) {
248 std::copy_n(ptr, my_num, my_store_max);
249 }
250 }
251 );
252
253 } else {
254 if (my_store_min) {
255 for (Index_ i = 0; i < my_num; ++i) {
256 auto val = ptr[i];
257 auto& current = my_store_min[i];
258 if (val < current) { // this should implicitly skip val=NaNs, as any NaN comparison will be false.
259 current = val;
260 }
261 }
262 }
263 if (my_store_max) {
264 for (Index_ i = 0; i < my_num; ++i) {
265 auto val = ptr[i];
266 auto& current = my_store_max[i];
267 if (val > current) { // this should implicitly skip val=NaNs, as any NaN comparison will be false.
268 current = val;
269 }
270 }
271 }
272 }
273 }
274
278 void finish() {
279 if (my_init) {
280 if (my_store_min) {
281 std::fill_n(my_store_min, my_num, internal::choose_minimum_placeholder<Value_>());
282 }
283 if (my_store_max) {
284 std::fill_n(my_store_max, my_num, internal::choose_maximum_placeholder<Value_>());
285 }
286 }
287 }
288
289private:
290 bool my_init = true;
291 Index_ my_num;
292 Output_* my_store_min;
293 Output_* my_store_max;
294 bool my_skip_nan;
295};
296
307template<typename Output_, typename Value_, typename Index_>
309public:
323 RunningSparse(Index_ num, Output_* store_min, Output_* store_max, bool skip_nan, Index_ subtract = 0) :
324 my_num(num), my_store_min(store_min), my_store_max(store_max), my_skip_nan(skip_nan), my_subtract(subtract) {}
325
332 void add(const Value_* value, const Index_* index, Index_ number) {
333 if (my_count == 0) {
334 tatami::resize_container_to_Index_size(my_nonzero, my_num);
335
336 if (my_store_min) {
337 std::fill_n(my_store_min, my_num, internal::choose_minimum_placeholder<Value_>());
338 }
339 if (my_store_max) {
340 std::fill_n(my_store_max, my_num, internal::choose_maximum_placeholder<Value_>());
341 }
342
343 if (!my_skip_nan) {
344 for (Index_ i = 0; i < number; ++i) {
345 auto val = value[i];
346 auto idx = index[i] - my_subtract;
347 if (my_store_min) {
348 my_store_min[idx] = val;
349 }
350 if (my_store_max) {
351 my_store_max[idx] = val;
352 }
353 ++my_nonzero[idx];
354 }
355 my_count = 1;
356 return;
357 }
358 }
359
360 for (Index_ i = 0; i < number; ++i) {
361 auto val = value[i];
362 auto idx = index[i] - my_subtract;
363 if (my_store_min) { // this should implicitly skip NaNs, any NaN comparison will be false.
364 auto& current = my_store_min[idx];
365 if (current > val) {
366 current = val;
367 }
368 }
369 if (my_store_max) {
370 auto& current = my_store_max[idx];
371 if (current < val) {
372 current = val;
373 }
374 }
375 ++my_nonzero[idx];
376 }
377
378 ++my_count;
379 }
380
384 void finish() {
385 if (my_count) {
386 for (Index_ i = 0; i < my_num; ++i) {
387 if (my_count > my_nonzero[i]) {
388 if (my_store_min) {
389 auto& current = my_store_min[i];
390 if (current > 0) {
391 current = 0;
392 }
393 }
394 if (my_store_max) {
395 auto& current = my_store_max[i];
396 if (current < 0) {
397 current = 0;
398 }
399 }
400 }
401 }
402 } else {
403 if (my_store_min) {
404 std::fill_n(my_store_min, my_num, internal::choose_minimum_placeholder<Value_>());
405 }
406 if (my_store_max) {
407 std::fill_n(my_store_max, my_num, internal::choose_maximum_placeholder<Value_>());
408 }
409 }
410 }
411
412private:
413 Index_ my_num;
414 Output_* my_store_min;
415 Output_* my_store_max;
416 bool my_skip_nan;
417 Index_ my_subtract;
418 Index_ my_count = 0;
419 std::vector<Index_> my_nonzero;
420};
421
440template<typename Value_, typename Index_, typename Output_>
441void apply(bool row, const tatami::Matrix<Value_, Index_>& mat, Output_* min_out, Output_* max_out, const Options& ropt) {
442 auto dim = (row ? mat.nrow() : mat.ncol());
443 auto otherdim = (row ? mat.ncol() : mat.nrow());
444 const bool direct = mat.prefer_rows() == row;
445
446 bool store_min = min_out != NULL;
447 bool store_max = max_out != NULL;
448
449 if (mat.sparse()) {
450 tatami::Options opt;
451 opt.sparse_ordered_index = false;
452
453 if (direct) {
454 opt.sparse_extract_index = false;
455 tatami::parallelize([&](int, Index_ s, Index_ l) -> void {
456 auto ext = tatami::consecutive_extractor<true>(mat, row, s, l, opt);
458 for (Index_ x = 0; x < l; ++x) {
459 auto out = ext->fetch(vbuffer.data(), NULL);
460 if (store_min) {
461 min_out[x + s] = ranges::direct(out.value, out.number, otherdim, true, ropt.skip_nan);
462 }
463 if (store_max) {
464 max_out[x + s] = ranges::direct(out.value, out.number, otherdim, false, ropt.skip_nan);
465 }
466 }
467 }, dim, ropt.num_threads);
468
469 } else {
470 tatami::parallelize([&](int thread, Index_ s, Index_ l) -> void {
471 auto ext = tatami::consecutive_extractor<true>(mat, !row, static_cast<Index_>(0), otherdim, s, l, opt);
474
475 auto local_min = (store_min ? LocalOutputBuffer<Output_>(thread, s, l, min_out) : LocalOutputBuffer<Output_>());
476 auto local_max = (store_max ? LocalOutputBuffer<Output_>(thread, s, l, max_out) : LocalOutputBuffer<Output_>());
477 ranges::RunningSparse<Output_, Value_, Index_> runner(l, local_min.data(), local_max.data(), ropt.skip_nan, s);
478
479 for (Index_ x = 0; x < otherdim; ++x) {
480 auto out = ext->fetch(vbuffer.data(), ibuffer.data());
481 runner.add(out.value, out.index, out.number);
482 }
483
484 runner.finish();
485 if (store_min) {
486 local_min.transfer();
487 }
488 if (store_max) {
489 local_max.transfer();
490 }
491 }, dim, ropt.num_threads);
492 }
493
494 } else {
495 if (direct) {
496 tatami::parallelize([&](int, Index_ s, Index_ l) -> void {
497 auto ext = tatami::consecutive_extractor<false>(mat, row, s, l);
499 for (Index_ x = 0; x < l; ++x) {
500 auto ptr = ext->fetch(buffer.data());
501 if (store_min) {
502 min_out[x + s] = ranges::direct(ptr, otherdim, true, ropt.skip_nan);
503 }
504 if (store_max) {
505 max_out[x + s] = ranges::direct(ptr, otherdim, false, ropt.skip_nan);
506 }
507 }
508 }, dim, ropt.num_threads);
509
510 } else {
511 tatami::parallelize([&](int thread, Index_ s, Index_ l) -> void {
512 auto ext = tatami::consecutive_extractor<false>(mat, !row, static_cast<Index_>(0), otherdim, s, l);
514
515 auto local_min = (store_min ? LocalOutputBuffer<Output_>(thread, s, l, min_out) : LocalOutputBuffer<Output_>());
516 auto local_max = (store_max ? LocalOutputBuffer<Output_>(thread, s, l, max_out) : LocalOutputBuffer<Output_>());
517 ranges::RunningDense<Output_, Value_, Index_> runner(l, local_min.data(), local_max.data(), ropt.skip_nan);
518
519 for (Index_ x = 0; x < otherdim; ++x) {
520 auto ptr = ext->fetch(buffer.data());
521 runner.add(ptr);
522 }
523
524 runner.finish();
525 if (store_min) {
526 local_min.transfer();
527 }
528 if (store_max) {
529 local_max.transfer();
530 }
531 }, dim, ropt.num_threads);
532 }
533 }
534
535 return;
536}
537
541// Back-compatibility.
542template<typename Value_, typename Index_, typename Output_>
543void apply(bool row, const tatami::Matrix<Value_, Index_>* p, Output_* min_out, Output_* max_out, const Options& ropt) {
544 apply(row, *p, min_out, max_out, ropt);
545}
563template<typename Output_ = double, typename Value_, typename Index_>
564std::pair<std::vector<Output_>, std::vector<Output_> > by_column(const tatami::Matrix<Value_, Index_>& mat, const Options& ropt) {
566 auto maxs = mins;
567 apply(false, mat, mins.data(), maxs.data(), ropt);
568 return std::make_pair(std::move(mins), std::move(maxs));
569}
570
574// Back-compatibility.
575template<typename Output_ = double, typename Value_, typename Index_>
576std::pair<std::vector<Output_>, std::vector<Output_> > by_column(const tatami::Matrix<Value_, Index_>* p, const Options& ropt) {
577 return by_column<Output_>(*p, ropt);
578}
579
580template<typename Output_ = double, typename Value_, typename Index_>
581std::pair<std::vector<Output_>, std::vector<Output_> > by_column(const tatami::Matrix<Value_, Index_>& mat) {
582 return by_column<Output_>(mat, {});
583}
584
585template<typename Output_ = double, typename Value_, typename Index_>
586std::pair<std::vector<Output_>, std::vector<Output_> > by_column(const tatami::Matrix<Value_, Index_>* p) {
587 return by_column<Output_>(*p);
588}
606template<typename Output_ = double, typename Value_, typename Index_>
607std::pair<std::vector<Output_>, std::vector<Output_> > by_row(const tatami::Matrix<Value_, Index_>& mat, const Options& ropt) {
609 auto maxs = mins;
610 apply(true, mat, mins.data(), maxs.data(), ropt);
611 return std::make_pair(std::move(mins), std::move(maxs));
612}
613
617// Back-compatibility.
618template<typename Output_ = double, typename Value_, typename Index_>
619std::pair<std::vector<Output_>, std::vector<Output_> > by_row(const tatami::Matrix<Value_, Index_>* p, const Options& ropt) {
620 return by_row<Output_>(*p, ropt);
621}
622
623template<typename Output_ = double, typename Value_, typename Index_>
624std::pair<std::vector<Output_>, std::vector<Output_> > by_row(const tatami::Matrix<Value_, Index_>& mat) {
625 return by_row<Output_>(mat, {});
626}
627
628template<typename Output_ = double, typename Value_, typename Index_>
629std::pair<std::vector<Output_>, std::vector<Output_> > by_row(const tatami::Matrix<Value_, Index_>* p) {
630 return by_row<Output_>(*p);
631}
636}
637
638}
639
640#endif
virtual Index_ ncol() const=0
virtual Index_ nrow() const=0
virtual bool prefer_rows() const=0
virtual std::unique_ptr< MyopicSparseExtractor< Value_, Index_ > > sparse(bool row, const Options &opt) const=0
Local output buffer for running calculations.
Definition utils.hpp:82
Running minima/maxima from dense data.
Definition ranges.hpp:195
void add(const Value_ *ptr)
Definition ranges.hpp:214
void finish()
Definition ranges.hpp:278
RunningDense(Index_ num, Output_ *store_min, Output_ *store_max, bool skip_nan)
Definition ranges.hpp:207
Running minima/maxima from sparse data.
Definition ranges.hpp:308
void finish()
Definition ranges.hpp:384
void add(const Value_ *value, const Index_ *index, Index_ number)
Definition ranges.hpp:332
RunningSparse(Index_ num, Output_ *store_min, Output_ *store_max, bool skip_nan, Index_ subtract=0)
Definition ranges.hpp:323
std::pair< std::vector< Output_ >, std::vector< Output_ > > by_row(const tatami::Matrix< Value_, Index_ > &mat, const Options &ropt)
Definition ranges.hpp:607
void apply(bool row, const tatami::Matrix< Value_, Index_ > &mat, Output_ *min_out, Output_ *max_out, const Options &ropt)
Definition ranges.hpp:441
std::pair< std::vector< Output_ >, std::vector< Output_ > > by_column(const tatami::Matrix< Value_, Index_ > &mat, const Options &ropt)
Definition ranges.hpp:564
Value_ direct(const Value_ *ptr, Index_ num, bool minimum, bool skip_nan)
Definition ranges.hpp:90
Functions to compute statistics from a tatami::Matrix.
Definition counts.hpp:18
void parallelize(Function_ fun, Index_ tasks, int threads)
void resize_container_to_Index_size(Container_ &container, Index_ x, Args_ &&... args)
Container_ create_container_of_Index_size(Index_ x, Args_ &&... args)
auto consecutive_extractor(const Matrix< Value_, Index_ > &matrix, bool row, Index_ iter_start, Index_ iter_length, Args_ &&... args)
bool sparse_extract_index
bool sparse_ordered_index
Range calculation options.
Definition ranges.hpp:29
bool skip_nan
Definition ranges.hpp:34
int num_threads
Definition ranges.hpp:40
Utilities for computing matrix statistics.