tatami
C++ API for different matrix representations
Loading...
Searching...
No Matches
compress_sparse_triplets.hpp
Go to the documentation of this file.
1#ifndef TATAMI_COMPRESS_SPARSE_TRIPLETS_H
2#define TATAMI_COMPRESS_SPARSE_TRIPLETS_H
3
4#include <vector>
5#include <algorithm>
6#include <numeric>
7#include <utility>
8#include <cstddef>
9
10#include "sanisizer/sanisizer.hpp"
11
12#include "../utils/Index_to_container.hpp"
13#include "../utils/copy.hpp"
14
21namespace tatami {
22
26namespace compress_triplets {
27
28template<class Primary_, class Secondary_>
29int is_ordered(const Primary_& primary, const Secondary_& secondary) {
30 if (!std::is_sorted(primary.begin(), primary.end())) {
31 return 2;
32 }
33
34 const auto nprimary = primary.size();
35 I<decltype(nprimary)> start = 0;
36 while (start < nprimary) {
37 I<decltype(nprimary)> end = start + 1;
38 while (end < nprimary && primary[end] == primary[start]) {
39 if (secondary[end] < secondary[end - 1]) {
40 // Quit on first failure; we've seen enough.
41 return 1;
42 }
43 ++end;
44 }
45 start = end;
46 }
47
48 return 0;
49}
50
51template<typename Size_, class Primary_, class Secondary_>
52void order(const int status, std::vector<Size_>& indices, const Primary_& primary, const Secondary_& secondary) {
53 if (status == 1) {
54 const auto nprimary = primary.size();
55 I<decltype(nprimary)> start = 0;
56 while (start < nprimary) {
57 I<decltype(nprimary)> end = start + 1;
58 while (end < nprimary && primary[end] == primary[start]) {
59 ++end;
60 }
61
62 // Checking if this particular run can be skipped.
63 if (!std::is_sorted(secondary.begin() + start, secondary.begin() + end)) {
64 std::sort(
65 indices.begin() + start,
66 indices.begin() + end,
67 [&](const Size_ left, const Size_ right) -> bool {
68 return secondary[left] < secondary[right];
69 }
70 );
71 }
72 start = end;
73 }
74
75 } else if (status == 2) {
76 std::sort(
77 indices.begin(),
78 indices.end(),
79 [&](const Size_ left, const Size_ right) -> bool {
80 if (primary[left] == primary[right]) {
81 return (secondary[left] < secondary[right]);
82 }
83 return (primary[left] < primary[right]);
84 }
85 );
86 }
87}
88
89}
122template<class Values_, typename Pointer_ = I<decltype(std::declval<Values_>().size())>, class PrimaryIndices_, class SecondaryIndices_>
123std::vector<Pointer_> compress_sparse_triplets(std::size_t num_primary, Values_& values, const PrimaryIndices_& primary_indices, SecondaryIndices_& secondary_indices) {
124 const auto N = values.size();
125 if (!safe_non_negative_equal(N, primary_indices.size()) || !safe_non_negative_equal(N, secondary_indices.size())) {
126 throw std::runtime_error("'primary_indices', 'secondary_indices' and 'values' should have the same length");
127 }
128
129 const int order_status = compress_triplets::is_ordered(primary_indices, secondary_indices);
130 if (order_status != 0) {
131 auto indices = sanisizer::create<std::vector<I<decltype(N)> > >(N);
132 std::iota(indices.begin(), indices.end(), static_cast<I<decltype(N)> >(0));
133 compress_triplets::order(order_status, indices, primary_indices, secondary_indices);
134
135 // Reordering values in place. This saves memory and allows us to work with Values_, RowIndices_, etc. that may not have well-defined copy constructors.
136 auto used = sanisizer::create<std::vector<unsigned char> >(N);
137 for (I<decltype(N)> i = 0; i < N; ++i) {
138 if (used[i]) {
139 continue;
140 }
141 auto current = i, replacement = indices[i];
142 used[i] = 1;
143
144 while (replacement != i) {
145 std::swap(secondary_indices[current], secondary_indices[replacement]);
146 std::swap(values[current], values[replacement]);
147 current = replacement;
148 used[current] = 1;
149 replacement = indices[replacement];
150 }
151 }
152 }
153
154 typedef std::vector<Pointer_> Output;
155 typedef typename Output::size_type OutputSize;
156 Output output(sanisizer::sum<OutputSize>(num_primary, 1));
157 for (const auto t : primary_indices) {
158 ++(output[sanisizer::sum_unsafe<OutputSize>(t, 1)]);
159 }
160 std::partial_sum(output.begin(), output.end(), output.begin());
161
162 return output;
163}
164
168// Back-compatibility.
169template<class Values_, class RowIndices_, class ColumnIndices_>
170std::vector<decltype(std::declval<Values_>().size())> compress_sparse_triplets(std::size_t nrow, std::size_t ncol, Values_& values, RowIndices_& row_indices, ColumnIndices_& column_indices, bool csr) {
171 if (csr) {
172 auto output = compress_sparse_triplets(nrow, values, row_indices, column_indices);
173 for (decltype(nrow) r = 0; r < nrow; ++r) {
174 std::fill_n(row_indices.begin() + output[r], output[r + 1] - output[r], r);
175 }
176 return output;
177 } else {
178 auto output = compress_sparse_triplets(ncol, values, column_indices, row_indices);
179 for (decltype(ncol) c = 0; c < ncol; ++c) {
180 std::fill_n(column_indices.begin() + output[c], output[c + 1] - output[c], c);
181 }
182 return output;
183 }
184}
185
186template<bool row_, class Values_, class RowIndices_, class ColumnIndices_>
187auto compress_sparse_triplets(std::size_t nrow, std::size_t ncol, Values_& values, RowIndices_& row_indices, ColumnIndices_& column_indices) {
188 return compress_sparse_triplets(nrow, ncol, values, row_indices, column_indices, row_);
189}
194}
195
196#endif
Copy data from one buffer to another.
Flexible representations for matrix data.
Definition Extractor.hpp:15
std::vector< Pointer_ > compress_sparse_triplets(std::size_t num_primary, Values_ &values, const PrimaryIndices_ &primary_indices, SecondaryIndices_ &secondary_indices)
Definition compress_sparse_triplets.hpp:123