tatami
C++ API for different matrix representations
Loading...
Searching...
No Matches
compress_sparse_triplets.hpp
Go to the documentation of this file.
1#ifndef TATAMI_COMPRESS_SPARSE_TRIPLETS_H
2#define TATAMI_COMPRESS_SPARSE_TRIPLETS_H
3
4#include "../utils/Index_to_container.hpp"
5
6#include <vector>
7#include <algorithm>
8#include <numeric>
9#include <utility>
10#include <cstddef>
11
12#include "sanisizer/sanisizer.hpp"
13
20namespace tatami {
21
22namespace compress_triplets {
23
27template<class Primary_, class Secondary_>
28int is_ordered(const Primary_& primary, const Secondary_& secondary) {
29 if (!std::is_sorted(primary.begin(), primary.end())) {
30 return 2;
31 }
32
33 auto nprimary = primary.size();
34 decltype(nprimary) start = 0;
35 while (start < nprimary) {
36 decltype(nprimary) end = start + 1;
37 while (end < nprimary && primary[end] == primary[start]) {
38 if (secondary[end] < secondary[end - 1]) {
39 // Quit on first failure; we've seen enough.
40 return 1;
41 }
42 ++end;
43 }
44 start = end;
45 }
46
47 return 0;
48}
49
50template<typename Size_, class Primary_, class Secondary_>
51void order(int status, std::vector<Size_>& indices, const Primary_& primary, const Secondary_& secondary) {
52 if (status == 1) {
53 auto nprimary = primary.size();
54 decltype(nprimary) start = 0;
55 while (start < nprimary) {
56 decltype(nprimary) end = start + 1;
57 while (end < nprimary && primary[end] == primary[start]) {
58 ++end;
59 }
60
61 // Checking if this particular run can be skipped.
62 if (!std::is_sorted(secondary.begin() + start, secondary.begin() + end)) {
63 std::sort(indices.begin() + start, indices.begin() + end, [&](Size_ left, Size_ right) -> bool {
64 return secondary[left] < secondary[right];
65 });
66 }
67 start = end;
68 }
69
70 } else if (status == 2) {
71 std::sort(indices.begin(), indices.end(), [&](Size_ left, Size_ right) -> bool {
72 if (primary[left] == primary[right]) {
73 return (secondary[left] < secondary[right]);
74 }
75 return (primary[left] < primary[right]);
76 });
77 }
78}
83}
84
105template<class Values_, class RowIndices_, class ColumnIndices_>
106std::vector<decltype(std::declval<Values_>().size())> compress_sparse_triplets(std::size_t nrow, std::size_t ncol, Values_& values, RowIndices_& row_indices, ColumnIndices_& column_indices, bool csr) {
107 // We use decltype(N) as the return type to match the size_type of the input containers, which might not be size_t for arbitrary containers.
108 auto N = values.size();
109 if (!safe_non_negative_equal(N, row_indices.size()) || !safe_non_negative_equal(N, column_indices.size())) {
110 throw std::runtime_error("'row_indices', 'column_indices' and 'values' should have the same length");
111 }
112
113 int order_status = 0;
114 if (csr) {
115 order_status = compress_triplets::is_ordered(row_indices, column_indices);
116 } else {
117 order_status = compress_triplets::is_ordered(column_indices, row_indices);
118 }
119
120 if (order_status != 0) {
121 auto indices = sanisizer::create<std::vector<decltype(N)> >(N);
122 std::iota(indices.begin(), indices.end(), static_cast<decltype(N)>(0));
123
124 // Sorting without duplicating the data.
125 if (csr) {
126 compress_triplets::order(order_status, indices, row_indices, column_indices);
127 } else {
128 compress_triplets::order(order_status, indices, column_indices, row_indices);
129 }
130
131 // Reordering values in place. This (i) saves memory, and (ii) allows
132 // us to work with Values_, RowIndices_, etc. that may not have well-defined copy
133 // constructors (e.g., if they refer to external memory).
134 auto used = sanisizer::create<std::vector<unsigned char> >(N);
135 for (decltype(N) i = 0; i < N; ++i) {
136 if (used[i]) {
137 continue;
138 }
139 auto current = i, replacement = indices[i];
140 used[i] = 1;
141
142 while (replacement != i) {
143 std::swap(row_indices[current], row_indices[replacement]);
144 std::swap(column_indices[current], column_indices[replacement]);
145 std::swap(values[current], values[replacement]);
146
147 current = replacement;
148 used[current] = 1;
149 replacement = indices[replacement];
150 }
151 }
152 }
153
154 // Collating the indices.
155 typedef std::vector<decltype(N)> Output;
156 typedef typename Output::size_type OutputSize;
157 Output output(sanisizer::sum<OutputSize>(csr ? nrow : ncol, 1));
158 if (csr) {
159 for (auto t : row_indices) {
160 ++(output[static_cast<OutputSize>(t) + 1]);
161 }
162 } else {
163 for (auto t : column_indices) {
164 ++(output[static_cast<OutputSize>(t) + 1]);
165 }
166 }
167 std::partial_sum(output.begin(), output.end(), output.begin());
168
169 return output;
170}
171
175// Back-compatibility.
176template<bool row_, class Values_, class RowIndices_, class ColumnIndices_>
177auto compress_sparse_triplets(std::size_t nrow, std::size_t ncol, Values_& values, RowIndices_& row_indices, ColumnIndices_& column_indices) {
178 return compress_sparse_triplets(nrow, ncol, values, row_indices, column_indices, row_);
179}
184}
185
186#endif
Flexible representations for matrix data.
Definition Extractor.hpp:15
std::vector< decltype(std::declval< Values_ >().size())> compress_sparse_triplets(std::size_t nrow, std::size_t ncol, Values_ &values, RowIndices_ &row_indices, ColumnIndices_ &column_indices, bool csr)
Definition compress_sparse_triplets.hpp:106