tatami
C++ API for different matrix representations
Loading...
Searching...
No Matches
compress_sparse_triplets.hpp
Go to the documentation of this file.
1#ifndef TATAMI_COMPRESS_SPARSE_TRIPLETS_H
2#define TATAMI_COMPRESS_SPARSE_TRIPLETS_H
3
4#include "../utils/Index_to_container.hpp"
5
6#include <vector>
7#include <algorithm>
8#include <numeric>
9#include <utility>
10#include <cstddef>
11
12#include "sanisizer/sanisizer.hpp"
13
20namespace tatami {
21
25namespace compress_triplets {
26
27template<class Primary_, class Secondary_>
28int is_ordered(const Primary_& primary, const Secondary_& secondary) {
29 if (!std::is_sorted(primary.begin(), primary.end())) {
30 return 2;
31 }
32
33 auto nprimary = primary.size();
34 decltype(nprimary) start = 0;
35 while (start < nprimary) {
36 decltype(nprimary) end = start + 1;
37 while (end < nprimary && primary[end] == primary[start]) {
38 if (secondary[end] < secondary[end - 1]) {
39 // Quit on first failure; we've seen enough.
40 return 1;
41 }
42 ++end;
43 }
44 start = end;
45 }
46
47 return 0;
48}
49
50template<typename Size_, class Primary_, class Secondary_>
51void order(int status, std::vector<Size_>& indices, const Primary_& primary, const Secondary_& secondary) {
52 if (status == 1) {
53 auto nprimary = primary.size();
54 decltype(nprimary) start = 0;
55 while (start < nprimary) {
56 decltype(nprimary) end = start + 1;
57 while (end < nprimary && primary[end] == primary[start]) {
58 ++end;
59 }
60
61 // Checking if this particular run can be skipped.
62 if (!std::is_sorted(secondary.begin() + start, secondary.begin() + end)) {
63 std::sort(indices.begin() + start, indices.begin() + end, [&](Size_ left, Size_ right) -> bool {
64 return secondary[left] < secondary[right];
65 });
66 }
67 start = end;
68 }
69
70 } else if (status == 2) {
71 std::sort(indices.begin(), indices.end(), [&](Size_ left, Size_ right) -> bool {
72 if (primary[left] == primary[right]) {
73 return (secondary[left] < secondary[right]);
74 }
75 return (primary[left] < primary[right]);
76 });
77 }
78}
79
80}
113template<class Values_, typename Pointer_ = decltype(std::declval<Values_>().size()), class PrimaryIndices_, class SecondaryIndices_>
114std::vector<Pointer_> compress_sparse_triplets(std::size_t num_primary, Values_& values, const PrimaryIndices_& primary_indices, SecondaryIndices_& secondary_indices) {
115 auto N = values.size();
116 if (!safe_non_negative_equal(N, primary_indices.size()) || !safe_non_negative_equal(N, secondary_indices.size())) {
117 throw std::runtime_error("'primary_indices', 'secondary_indices' and 'values' should have the same length");
118 }
119
120 int order_status = compress_triplets::is_ordered(primary_indices, secondary_indices);
121 if (order_status != 0) {
122 auto indices = sanisizer::create<std::vector<decltype(N)> >(N);
123 std::iota(indices.begin(), indices.end(), static_cast<decltype(N)>(0));
124 compress_triplets::order(order_status, indices, primary_indices, secondary_indices);
125
126 // Reordering values in place. This saves memory and allows us to work with Values_, RowIndices_, etc. that may not have well-defined copy constructors.
127 auto used = sanisizer::create<std::vector<unsigned char> >(N);
128 for (decltype(N) i = 0; i < N; ++i) {
129 if (used[i]) {
130 continue;
131 }
132 auto current = i, replacement = indices[i];
133 used[i] = 1;
134
135 while (replacement != i) {
136 std::swap(secondary_indices[current], secondary_indices[replacement]);
137 std::swap(values[current], values[replacement]);
138 current = replacement;
139 used[current] = 1;
140 replacement = indices[replacement];
141 }
142 }
143 }
144
145 typedef std::vector<Pointer_> Output;
146 typedef typename Output::size_type OutputSize;
147 Output output(sanisizer::sum<OutputSize>(num_primary, 1));
148 for (auto t : primary_indices) {
149 ++(output[static_cast<OutputSize>(t) + 1]);
150 }
151 std::partial_sum(output.begin(), output.end(), output.begin());
152
153 return output;
154}
155
159// Back-compatibility.
160template<class Values_, class RowIndices_, class ColumnIndices_>
161std::vector<decltype(std::declval<Values_>().size())> compress_sparse_triplets(std::size_t nrow, std::size_t ncol, Values_& values, RowIndices_& row_indices, ColumnIndices_& column_indices, bool csr) {
162 if (csr) {
163 auto output = compress_sparse_triplets(nrow, values, row_indices, column_indices);
164 for (decltype(nrow) r = 0; r < nrow; ++r) {
165 std::fill_n(row_indices.begin() + output[r], output[r + 1] - output[r], r);
166 }
167 return output;
168 } else {
169 auto output = compress_sparse_triplets(ncol, values, column_indices, row_indices);
170 for (decltype(ncol) c = 0; c < ncol; ++c) {
171 std::fill_n(column_indices.begin() + output[c], output[c + 1] - output[c], c);
172 }
173 return output;
174 }
175}
176
177template<bool row_, class Values_, class RowIndices_, class ColumnIndices_>
178auto compress_sparse_triplets(std::size_t nrow, std::size_t ncol, Values_& values, RowIndices_& row_indices, ColumnIndices_& column_indices) {
179 return compress_sparse_triplets(nrow, ncol, values, row_indices, column_indices, row_);
180}
185}
186
187#endif
Flexible representations for matrix data.
Definition Extractor.hpp:15
std::vector< Pointer_ > compress_sparse_triplets(std::size_t num_primary, Values_ &values, const PrimaryIndices_ &primary_indices, SecondaryIndices_ &secondary_indices)
Definition compress_sparse_triplets.hpp:114