tatami
C++ API for different matrix representations
Loading...
Searching...
No Matches
compress_sparse_triplets.hpp
Go to the documentation of this file.
1#ifndef TATAMI_COMPRESS_SPARSE_TRIPLETS_H
2#define TATAMI_COMPRESS_SPARSE_TRIPLETS_H
3
4#include <vector>
5#include <algorithm>
6#include <numeric>
7#include <utility>
8#include <cstddef>
9
11
18namespace tatami {
19
20namespace compress_triplets {
21
25template<class Primary_, class Secondary_>
26int is_ordered(const Primary_& primary, const Secondary_& secondary) {
27 if (!std::is_sorted(primary.begin(), primary.end())) {
28 return 2;
29 }
30
31 auto nprimary = primary.size();
32 decltype(nprimary) start = 0;
33 while (start < nprimary) {
34 decltype(nprimary) end = start + 1;
35 while (end < nprimary && primary[end] == primary[start]) {
36 if (secondary[end] < secondary[end - 1]) {
37 // Quit on first failure; we've seen enough.
38 return 1;
39 }
40 ++end;
41 }
42 start = end;
43 }
44
45 return 0;
46}
47
48template<typename Size_, class Primary_, class Secondary_>
49void order(int status, std::vector<Size_>& indices, const Primary_& primary, const Secondary_& secondary) {
50 if (status == 1) {
51 auto nprimary = primary.size();
52 decltype(nprimary) start = 0;
53 while (start < nprimary) {
54 decltype(nprimary) end = start + 1;
55 while (end < nprimary && primary[end] == primary[start]) {
56 ++end;
57 }
58
59 // Checking if this particular run can be skipped.
60 if (!std::is_sorted(secondary.begin() + start, secondary.begin() + end)) {
61 std::sort(indices.begin() + start, indices.begin() + end, [&](Size_ left, Size_ right) -> bool {
62 return secondary[left] < secondary[right];
63 });
64 }
65 start = end;
66 }
67
68 } else if (status == 2) {
69 std::sort(indices.begin(), indices.end(), [&](Size_ left, Size_ right) -> bool {
70 if (primary[left] == primary[right]) {
71 return (secondary[left] < secondary[right]);
72 }
73 return (primary[left] < primary[right]);
74 });
75 }
76}
81}
82
103template<class Values_, class RowIndices_, class ColumnIndices_>
104std::vector<decltype(std::declval<Values_>().size())> compress_sparse_triplets(std::size_t nrow, std::size_t ncol, Values_& values, RowIndices_& row_indices, ColumnIndices_& column_indices, bool csr) {
105 // We use decltype(N) as the return type to match the size_type of the input containers, which might not be size_t for arbitrary containers.
106 auto N = values.size();
107 if (!safe_non_negative_equal(N, row_indices.size()) || !safe_non_negative_equal(N, column_indices.size())) {
108 throw std::runtime_error("'row_indices', 'column_indices' and 'values' should have the same length");
109 }
110
111 int order_status = 0;
112 if (csr) {
113 order_status = compress_triplets::is_ordered(row_indices, column_indices);
114 } else {
115 order_status = compress_triplets::is_ordered(column_indices, row_indices);
116 }
117
118 if (order_status != 0) {
119 std::vector<decltype(N)> indices(N);
120 std::iota(indices.begin(), indices.end(), static_cast<decltype(N)>(0));
121
122 // Sorting without duplicating the data.
123 if (csr) {
124 compress_triplets::order(order_status, indices, row_indices, column_indices);
125 } else {
126 compress_triplets::order(order_status, indices, column_indices, row_indices);
127 }
128
129 // Reordering values in place. This (i) saves memory, and (ii) allows
130 // us to work with Values_, RowIndices_, etc. that may not have well-defined copy
131 // constructors (e.g., if they refer to external memory).
132 std::vector<unsigned char> used(N);
133 for (decltype(N) i = 0; i < N; ++i) {
134 if (used[i]) {
135 continue;
136 }
137 auto current = i, replacement = indices[i];
138 used[i] = 1;
139
140 while (replacement != i) {
141 std::swap(row_indices[current], row_indices[replacement]);
142 std::swap(column_indices[current], column_indices[replacement]);
143 std::swap(values[current], values[replacement]);
144
145 current = replacement;
146 used[current] = 1;
147 replacement = indices[replacement];
148 }
149 }
150 }
151
152 // Collating the indices.
153 std::vector<decltype(N)> output(csr ? nrow + 1 : ncol + 1);
154 if (csr) {
155 for (auto t : row_indices) {
156 ++(output[t+1]);
157 }
158 } else {
159 for (auto t : column_indices) {
160 ++(output[t+1]);
161 }
162 }
163 std::partial_sum(output.begin(), output.end(), output.begin());
164
165 return output;
166}
167
171// Back-compatibility.
172template<bool row_, class Values_, class RowIndices_, class ColumnIndices_>
173auto compress_sparse_triplets(std::size_t nrow, std::size_t ncol, Values_& values, RowIndices_& row_indices, ColumnIndices_& column_indices) {
174 return compress_sparse_triplets(nrow, ncol, values, row_indices, column_indices, row_);
175}
180}
181
182#endif
Sign-aware integer comparisons.
Flexible representations for matrix data.
Definition Extractor.hpp:15
bool safe_non_negative_equal(Left_ l, Right_ r)
Definition integer_comparisons.hpp:23
std::vector< decltype(std::declval< Values_ >().size())> compress_sparse_triplets(std::size_t nrow, std::size_t ncol, Values_ &values, RowIndices_ &row_indices, ColumnIndices_ &column_indices, bool csr)
Definition compress_sparse_triplets.hpp:104