tatami_stats
Matrix statistics for tatami
Loading...
Searching...
No Matches
ranges.hpp
Go to the documentation of this file.
1#ifndef TATAMI_STATS_RANGES_HPP
2#define TATAMI_STATS_RANGES_HPP
3
4#include "tatami/tatami.hpp"
5#include "utils.hpp"
6
7#include <vector>
8#include <algorithm>
9#include <type_traits>
10
17namespace tatami_stats {
18
23namespace ranges {
24
28struct Options {
33 bool skip_nan = false;
34
39 int num_threads = 1;
40};
41
45namespace internal {
46
47template<bool minimum_, typename Value_>
48constexpr auto choose_placeholder() {
49 if constexpr(minimum_) {
50 // Placeholder value 'x' is such that 'x > y' is always true for any non-NaN 'y'.
51 if constexpr(std::numeric_limits<Value_>::has_infinity) {
52 return std::numeric_limits<Value_>::infinity();
53 } else {
54 return std::numeric_limits<Value_>::max();
55 }
56 } else {
57 // Placeholder value 'x' is such that 'x < y' is always true for any non-NaN 'y'.
58 if constexpr(std::numeric_limits<Value_>::has_infinity) {
59 return -std::numeric_limits<Value_>::infinity();
60 } else {
61 return std::numeric_limits<Value_>::lowest();
62 }
63 }
64}
65
66template<bool minimum_, typename Output_, typename Value_>
67bool is_better(Output_ best, Value_ alt) {
68 if constexpr(minimum_) {
69 return best > static_cast<Output_>(alt);
70 } else {
71 return best < static_cast<Output_>(alt);
72 }
73}
74
75}
96template<bool minimum_, typename Value_, typename Index_>
97Value_ direct(const Value_* ptr, Index_ num, bool skip_nan) {
98 return ::tatami_stats::internal::nanable_ifelse_with_value<Value_>(
99 skip_nan,
100 [&]() -> Value_ {
101 auto current = internal::choose_placeholder<minimum_, Value_>();
102 for (Index_ i = 0; i < num; ++i) {
103 auto val = ptr[i];
104 if (internal::is_better<minimum_>(current, val)) { // no need to explicitly handle NaNs, as any comparison with NaNs is always false.
105 current = val;
106 }
107 }
108 return current;
109 },
110 [&]() -> Value_ {
111 if (num) {
112 if constexpr(minimum_) {
113 return *std::min_element(ptr, ptr + num);
114 } else {
115 return *std::max_element(ptr, ptr + num);
116 }
117 } else {
118 return internal::choose_placeholder<minimum_, Value_>();
119 }
120 }
121 );
122}
123
142template<bool minimum_, typename Value_, typename Index_>
143Value_ direct(const Value_* value, Index_ num_nonzero, Index_ num_all, bool skip_nan) {
144 if (num_nonzero) {
145 auto candidate = direct<minimum_>(value, num_nonzero, skip_nan);
146 if (num_nonzero < num_all && internal::is_better<minimum_>(candidate, 0)) {
147 candidate = 0;
148 }
149 return candidate;
150 } else if (num_all) {
151 return 0;
152 } else {
153 return internal::choose_placeholder<minimum_, Value_>();
154 }
155}
156
172template<bool minimum_, typename Output_, typename Value_, typename Index_>
174public:
181 RunningDense(Index_ num, Output_* store, bool skip_nan) : my_num(num), my_store(store), my_skip_nan(skip_nan) {}
182
187 void add(const Value_* ptr) {
188 if (my_init) {
189 my_init = false;
190 ::tatami_stats::internal::nanable_ifelse<Value_>(
191 my_skip_nan,
192 [&]() {
193 for (Index_ i = 0; i < my_num; ++i, ++ptr) {
194 auto val = *ptr;
195 if (std::isnan(val)) {
196 my_store[i] = internal::choose_placeholder<minimum_, Value_>();
197 } else {
198 my_store[i] = val;
199 }
200 }
201 },
202 [&]() {
203 std::copy_n(ptr, my_num, my_store);
204 }
205 );
206
207 } else {
208 for (Index_ i = 0; i < my_num; ++i, ++ptr) {
209 auto val = *ptr;
210 if (internal::is_better<minimum_>(my_store[i], val)) { // this should implicitly skip NaNs, any NaN comparison will be false.
211 my_store[i] = val;
212 }
213 }
214 }
215 }
216
220 void finish() {
221 if (my_init) {
222 std::fill_n(my_store, my_num, internal::choose_placeholder<minimum_, Value_>());
223 }
224 }
225
226private:
227 bool my_init = true;
228 Index_ my_num;
229 Output_* my_store;
230 bool my_skip_nan;
231};
232
245template<bool minimum_, typename Output_, typename Value_, typename Index_>
247public:
257 RunningSparse(Index_ num, Output_* store, bool skip_nan, Index_ subtract = 0) :
258 my_num(num), my_store(store), my_skip_nan(skip_nan), my_subtract(subtract) {}
259
266 void add(const Value_* value, const Index_* index, Index_ number) {
267 if (my_count == 0) {
268 my_nonzero.resize(my_num);
269 std::fill_n(my_store, my_num, internal::choose_placeholder<minimum_, Value_>());
270
271 if (!my_skip_nan) {
272 for (Index_ i = 0; i < number; ++i, ++value, ++index) {
273 auto val = *value;
274 auto idx = *index - my_subtract;
275 my_store[idx] = val;
276 ++my_nonzero[idx];
277 }
278 my_count = 1;
279 return;
280 }
281 }
282
283 for (Index_ i = 0; i < number; ++i, ++value, ++index) {
284 auto val = *value;
285 auto idx = *index - my_subtract;
286 auto& current = my_store[idx];
287 if (internal::is_better<minimum_>(current, val)) { // this should implicitly skip NaNs, any NaN comparison will be false.
288 current = val;
289 }
290 ++my_nonzero[idx];
291 }
292
293 ++my_count;
294 }
295
299 void finish() {
300 if (my_count) {
301 for (Index_ i = 0; i < my_num; ++i) {
302 if (my_count > my_nonzero[i]) {
303 auto& current = my_store[i];
304 if (internal::is_better<minimum_>(current, 0)) {
305 current = 0;
306 }
307 }
308 }
309 } else {
310 std::fill_n(my_store, my_num, internal::choose_placeholder<minimum_, Value_>());
311 }
312 }
313
314private:
315 Index_ my_num;
316 Output_* my_store;
317 bool my_skip_nan;
318 Index_ my_subtract;
319 Index_ my_count = 0;
320 std::vector<Index_> my_nonzero;
321};
322
341template<typename Value_, typename Index_, typename Output_>
342void apply(bool row, const tatami::Matrix<Value_, Index_>* p, Output_* min_out, Output_* max_out, const Options& ropt) {
343 auto dim = (row ? p->nrow() : p->ncol());
344 auto otherdim = (row ? p->ncol() : p->nrow());
345 const bool direct = p->prefer_rows() == row;
346
347 bool store_min = min_out != NULL;
348 bool store_max = max_out != NULL;
349
350 if (p->sparse()) {
351 tatami::Options opt;
352 opt.sparse_ordered_index = false;
353
354 if (direct) {
355 opt.sparse_extract_index = false;
356 tatami::parallelize([&](int, Index_ s, Index_ l) {
357 auto ext = tatami::consecutive_extractor<true>(p, row, s, l, opt);
358 std::vector<Value_> vbuffer(otherdim);
359 for (Index_ x = 0; x < l; ++x) {
360 auto out = ext->fetch(vbuffer.data(), NULL);
361 if (store_min) {
362 min_out[x + s] = ranges::direct<true>(out.value, out.number, otherdim, ropt.skip_nan);
363 }
364 if (store_max) {
365 max_out[x + s] = ranges::direct<false>(out.value, out.number, otherdim, ropt.skip_nan);
366 }
367 }
368 }, dim, ropt.num_threads);
369
370 } else {
371 tatami::parallelize([&](int thread, Index_ s, Index_ l) {
372 auto ext = tatami::consecutive_extractor<true>(p, !row, static_cast<Index_>(0), otherdim, s, l, opt);
373 std::vector<Value_> vbuffer(l);
374 std::vector<Index_> ibuffer(l);
375
376 auto local_min = (store_min ? LocalOutputBuffer<Output_>(thread, s, l, min_out) : LocalOutputBuffer<Output_>());
377 auto local_max = (store_max ? LocalOutputBuffer<Output_>(thread, s, l, max_out) : LocalOutputBuffer<Output_>());
378 ranges::RunningSparse<true, Output_, Value_, Index_> runmin(l, local_min.data(), ropt.skip_nan, s);
379 ranges::RunningSparse<false, Output_, Value_, Index_> runmax(l, local_max.data(), ropt.skip_nan, s);
380
381 for (Index_ x = 0; x < otherdim; ++x) {
382 auto out = ext->fetch(vbuffer.data(), ibuffer.data());
383 if (store_min) {
384 runmin.add(out.value, out.index, out.number);
385 }
386 if (store_max) {
387 runmax.add(out.value, out.index, out.number);
388 }
389 }
390
391 if (store_min) {
392 runmin.finish();
393 local_min.transfer();
394 }
395 if (store_max) {
396 runmax.finish();
397 local_max.transfer();
398 }
399 }, dim, ropt.num_threads);
400 }
401
402 } else {
403 if (direct) {
404 tatami::parallelize([&](int, Index_ s, Index_ l) {
405 auto ext = tatami::consecutive_extractor<false>(p, row, s, l);
406 std::vector<Value_> buffer(otherdim);
407 for (Index_ x = 0; x < l; ++x) {
408 auto ptr = ext->fetch(buffer.data());
409 if (store_min) {
410 min_out[x + s] = ranges::direct<true>(ptr, otherdim, ropt.skip_nan);
411 }
412 if (store_max) {
413 max_out[x + s] = ranges::direct<false>(ptr, otherdim, ropt.skip_nan);
414 }
415 }
416 }, dim, ropt.num_threads);
417
418 } else {
419 tatami::parallelize([&](int thread, Index_ s, Index_ l) {
420 auto ext = tatami::consecutive_extractor<false>(p, !row, static_cast<Index_>(0), otherdim, s, l);
421 std::vector<Value_> buffer(l);
422
423 auto local_min = (store_min ? LocalOutputBuffer<Output_>(thread, s, l, min_out) : LocalOutputBuffer<Output_>());
424 auto local_max = (store_max ? LocalOutputBuffer<Output_>(thread, s, l, max_out) : LocalOutputBuffer<Output_>());
425 ranges::RunningDense<true, Output_, Value_, Index_> runmin(l, local_min.data(), ropt.skip_nan);
426 ranges::RunningDense<false, Output_, Value_, Index_> runmax(l, local_max.data(), ropt.skip_nan);
427
428 for (Index_ x = 0; x < otherdim; ++x) {
429 auto ptr = ext->fetch(buffer.data());
430 if (store_min) {
431 runmin.add(ptr);
432 }
433 if (store_max) {
434 runmax.add(ptr);
435 }
436 }
437
438 if (store_min) {
439 runmin.finish();
440 local_min.transfer();
441 }
442 if (store_max) {
443 runmax.finish();
444 local_max.transfer();
445 }
446 }, dim, ropt.num_threads);
447 }
448 }
449
450 return;
451}
452
466template<typename Output_ = double, typename Value_, typename Index_>
467std::pair<std::vector<Output_>, std::vector<Output_> > by_column(const tatami::Matrix<Value_, Index_>* p, const Options& ropt) {
468 std::vector<Output_> mins(p->ncol()), maxs(p->ncol());
469 apply(false, p, mins.data(), maxs.data(), ropt);
470 return std::make_pair(std::move(mins), std::move(maxs));
471}
472
485template<typename Output_ = double, typename Value_, typename Index_>
486std::pair<std::vector<Output_>, std::vector<Output_> > by_column(const tatami::Matrix<Value_, Index_>* p) {
487 return by_column<Output_>(p, Options());
488}
489
503template<typename Output_ = double, typename Value_, typename Index_>
504std::pair<std::vector<Output_>, std::vector<Output_> > by_row(const tatami::Matrix<Value_, Index_>* p, const Options& ropt) {
505 std::vector<Output_> mins(p->nrow()), maxs(p->nrow());
506 apply(true, p, mins.data(), maxs.data(), ropt);
507 return std::make_pair(std::move(mins), std::move(maxs));
508}
509
522template<typename Output_ = double, typename Value_, typename Index_>
523std::pair<std::vector<Output_>, std::vector<Output_> > by_row(const tatami::Matrix<Value_, Index_>* p) {
524 return by_row<Output_>(p, Options());
525}
526
527}
528
529}
530
531#endif
virtual Index_ ncol() const=0
virtual Index_ nrow() const=0
virtual bool prefer_rows() const=0
virtual std::unique_ptr< MyopicSparseExtractor< Value_, Index_ > > sparse(bool row, const Options &opt) const=0
Local output buffer for running calculations.
Definition utils.hpp:72
Running minima/maxima from dense data.
Definition ranges.hpp:173
void finish()
Definition ranges.hpp:220
RunningDense(Index_ num, Output_ *store, bool skip_nan)
Definition ranges.hpp:181
void add(const Value_ *ptr)
Definition ranges.hpp:187
Running minima/maxima from sparse data.
Definition ranges.hpp:246
void finish()
Definition ranges.hpp:299
RunningSparse(Index_ num, Output_ *store, bool skip_nan, Index_ subtract=0)
Definition ranges.hpp:257
void add(const Value_ *value, const Index_ *index, Index_ number)
Definition ranges.hpp:266
std::pair< std::vector< Output_ >, std::vector< Output_ > > by_row(const tatami::Matrix< Value_, Index_ > *p, const Options &ropt)
Definition ranges.hpp:504
Value_ direct(const Value_ *ptr, Index_ num, bool skip_nan)
Definition ranges.hpp:97
std::pair< std::vector< Output_ >, std::vector< Output_ > > by_column(const tatami::Matrix< Value_, Index_ > *p, const Options &ropt)
Definition ranges.hpp:467
void apply(bool row, const tatami::Matrix< Value_, Index_ > *p, Output_ *min_out, Output_ *max_out, const Options &ropt)
Definition ranges.hpp:342
Functions to compute statistics from a tatami::Matrix.
Definition counts.hpp:18
void parallelize(Function_ fun, Index_ tasks, int threads)
auto consecutive_extractor(const Matrix< Value_, Index_ > *mat, bool row, Index_ iter_start, Index_ iter_length, Args_ &&... args)
bool sparse_extract_index
bool sparse_ordered_index
Range calculation options.
Definition ranges.hpp:28
bool skip_nan
Definition ranges.hpp:33
int num_threads
Definition ranges.hpp:39
Utilities for computing matrix statistics.