tatami
C++ API for different matrix representations
Loading...
Searching...
No Matches
compress_sparse_triplets.hpp
Go to the documentation of this file.
1#ifndef TATAMI_COMPRESS_SPARSE_TRIPLETS_H
2#define TATAMI_COMPRESS_SPARSE_TRIPLETS_H
3
4#include <vector>
5#include <algorithm>
6#include <numeric>
7
14namespace tatami {
15
16namespace compress_triplets {
17
21template<class Primary, class Secondary>
22int is_ordered(const Primary& primary, const Secondary& secondary) {
23 if (!std::is_sorted(primary.begin(), primary.end())) {
24 return 2;
25 }
26
27 size_t start = 0;
28 while (start < primary.size()) {
29 size_t end = start + 1;
30 while (end < primary.size() && primary[end] == primary[start]) {
31 if (secondary[end] < secondary[end - 1]) {
32 // Quit on first failure; we've seen enough.
33 return 1;
34 }
35 ++end;
36 }
37 start = end;
38 }
39
40 return 0;
41}
42
43template<class Primary, class Secondary>
44void order(int status, std::vector<size_t>& indices, const Primary& primary, const Secondary& secondary) {
45 if (status == 1) {
46 size_t start = 0;
47 while (start < primary.size()) {
48 size_t end = start + 1;
49 while (end < primary.size() && primary[end] == primary[start]) {
50 ++end;
51 }
52
53 // Checking if this particular run can be skipped.
54 if (!std::is_sorted(secondary.begin() + start, secondary.begin() + end)) {
55 std::sort(indices.begin() + start, indices.begin() + end, [&](size_t left, size_t right) -> bool {
56 return secondary[left] < secondary[right];
57 });
58 }
59 start = end;
60 }
61
62 } else if (status == 2) {
63 std::sort(indices.begin(), indices.end(), [&](size_t left, size_t right) -> bool {
64 if (primary[left] == primary[right]) {
65 return (secondary[left] < secondary[right]);
66 }
67 return (primary[left] < primary[right]);
68 });
69 }
70}
75}
76
97template<class Values_, class RowIndices_, class ColumnIndices_>
98std::vector<size_t> compress_sparse_triplets(size_t nrow, size_t ncol, Values_& values, RowIndices_& row_indices, ColumnIndices_& column_indices, bool csr) {
99 const size_t N = row_indices.size();
100 if (N != column_indices.size() || values.size() != N) {
101 throw std::runtime_error("'row_indices', 'column_indices' and 'values' should have the same length");
102 }
103
104 int order_status = 0;
105 if (csr) {
106 order_status = compress_triplets::is_ordered(row_indices, column_indices);
107 } else {
108 order_status = compress_triplets::is_ordered(column_indices, row_indices);
109 }
110
111 if (order_status != 0) {
112 std::vector<size_t> indices(N);
113 for (size_t i = 0; i < N; ++i) {
114 indices[i] = i;
115 }
116
117 // Sorting without duplicating the data.
118 if (csr) {
119 compress_triplets::order(order_status, indices, row_indices, column_indices);
120 } else {
121 compress_triplets::order(order_status, indices, column_indices, row_indices);
122 }
123
124 // Reordering values in place. This (i) saves memory, and (ii) allows
125 // us to work with Values_, RowIndices_, etc. that may not have well-defined copy
126 // constructors (e.g., if they refer to external memory).
127 for (size_t i = 0; i < indices.size(); ++i) {
128 if (indices[i] == static_cast<size_t>(-1)) {
129 continue;
130 }
131
132 size_t current = i, replacement = indices[i];
133 indices[i] = -1;
134
135 while (replacement != i) {
136 std::swap(row_indices[current], row_indices[replacement]);
137 std::swap(column_indices[current], column_indices[replacement]);
138 std::swap(values[current], values[replacement]);
139
140 current = replacement;
141 auto next_replacement = indices[replacement];
142 indices[replacement] = -1;
143 replacement = next_replacement;
144 }
145 }
146 }
147
148 // Collating the indices.
149 std::vector<size_t> output(csr ? nrow + 1 : ncol + 1);
150 if (csr) {
151 for (auto t : row_indices) {
152 ++(output[t+1]);
153 }
154 } else {
155 for (auto t : column_indices) {
156 ++(output[t+1]);
157 }
158 }
159 std::partial_sum(output.begin(), output.end(), output.begin());
160
161 return output;
162}
163
167// Back-compatibility.
168template<bool row_, class Values_, class RowIndices_, class ColumnIndices_>
169std::vector<size_t> compress_sparse_triplets(size_t nrow, size_t ncol, Values_& values, RowIndices_& row_indices, ColumnIndices_& column_indices) {
170 return compress_sparse_triplets(nrow, ncol, values, row_indices, column_indices, row_);
171}
176}
177
178#endif
Flexible representations for matrix data.
Definition Extractor.hpp:15
std::vector< size_t > compress_sparse_triplets(size_t nrow, size_t ncol, Values_ &values, RowIndices_ &row_indices, ColumnIndices_ &column_indices, bool csr)
Definition compress_sparse_triplets.hpp:98
auto consecutive_extractor(const Matrix< Value_, Index_ > *mat, bool row, Index_ iter_start, Index_ iter_length, Args_ &&... args)
Definition consecutive_extractor.hpp:35