tatami
C++ API for different matrix representations
Loading...
Searching...
No Matches
compress_sparse_triplets.hpp
Go to the documentation of this file.
1#ifndef TATAMI_COMPRESS_SPARSE_TRIPLETS_H
2#define TATAMI_COMPRESS_SPARSE_TRIPLETS_H
3
4#include <vector>
5#include <algorithm>
6#include <numeric>
7#include <utility>
8#include <cstddef>
9
10#include "sanisizer/sanisizer.hpp"
11
12#include "../utils/copy.hpp"
13
20namespace tatami {
21
51template<typename Pointer_ = std::size_t, typename Extent_, class Values_, class PrimaryIndices_, class SecondaryIndices_>
52std::vector<Pointer_> compress_sparse_triplets(const Extent_ num_primary, Values_& values, const PrimaryIndices_& primary_indices, SecondaryIndices_& secondary_indices) {
53 const auto num_triplets = values.size();
54 if (!sanisizer::is_equal(num_triplets, primary_indices.size()) || !sanisizer::is_equal(num_triplets, secondary_indices.size())) {
55 throw std::runtime_error("'primary_indices', 'secondary_indices' and 'values' should have the same length");
56 }
57 sanisizer::cast<Pointer_>(num_triplets); // check that additions and cumulative sums will not overflow the Pointer_ type.
58
59 typedef std::vector<Pointer_> Output;
60 typedef typename Output::size_type OutputSize;
61 Output ptrs(sanisizer::sum<OutputSize>(num_primary, 1));
62 for (const auto p : primary_indices) {
63 ++ptrs[sanisizer::sum_unsafe<OutputSize>(p, 1)];
64 }
65 std::partial_sum(ptrs.begin(), ptrs.end(), ptrs.begin());
66
67 if (!std::is_sorted(primary_indices.begin(), primary_indices.end())) {
68 std::vector<Pointer_> copy(ptrs.begin(), ptrs.begin() + num_primary); // don't need the last element.
69 auto triplet_indices = sanisizer::create<std::vector<I<decltype(num_triplets)> > >(num_triplets);
70 for (I<decltype(num_triplets)> t = 0; t < num_triplets; ++t) {
71 auto& offset = copy[primary_indices[t]];
72 triplet_indices[offset] = t;
73 ++offset;
74 }
75
76 // Reordering in-place so that triplets are sorted by primary index.
77 // Ordering of secondary_indices is not yet guaranteed and will be handled later.
78 for (I<decltype(num_triplets)> i = 0; i < num_triplets; ++i) {
79 if (triplet_indices[i] == num_triplets) {
80 continue;
81 }
82
83 const auto cur_second = secondary_indices[i];
84 const auto cur_value = values[i];
85 auto current = i, replacement = triplet_indices[current];
86
87 while (replacement != i) {
88 secondary_indices[current] = secondary_indices[replacement];
89 values[current] = values[replacement];
90 triplet_indices[current] = num_triplets;
91 current = replacement;
92 replacement = triplet_indices[replacement];
93 }
94
95 secondary_indices[current] = cur_second;
96 values[current] = cur_value;
97 triplet_indices[current] = num_triplets;
98 }
99 }
100
101 std::vector<std::pair<I<decltype(secondary_indices[0])>, I<decltype(values[0])> > > sortspace;
102 for (Extent_ p = 0; p < num_primary; ++p) {
103 const auto start = ptrs[p];
104 const auto end = ptrs[p + 1]; // + 1 is safe as p < num_primary, which fits in an Extent_.
105 if (std::is_sorted(secondary_indices.begin() + start, secondary_indices.begin() + end)) {
106 continue;
107 }
108
109 sortspace.clear();
110 sortspace.reserve(end - start);
111 for (Pointer_ x = start; x < end; ++x) {
112 sortspace.emplace_back(secondary_indices[x], values[x]);
113 }
114 std::sort(sortspace.begin(), sortspace.end());
115
116 auto ssIt = sortspace.begin();
117 for (Pointer_ x = start; x < end; ++x, ++ssIt) {
118 secondary_indices[x] = ssIt->first;
119 values[x] = ssIt->second;
120 }
121 }
122
123 return ptrs;
124}
125
129// Back-compatibility.
130template<class Values_, class RowIndices_, class ColumnIndices_>
131std::vector<decltype(std::declval<Values_>().size())> compress_sparse_triplets(std::size_t nrow, std::size_t ncol, Values_& values, RowIndices_& row_indices, ColumnIndices_& column_indices, bool csr) {
132 if (csr) {
133 auto output = compress_sparse_triplets(nrow, values, row_indices, column_indices);
134 for (decltype(nrow) r = 0; r < nrow; ++r) {
135 std::fill_n(row_indices.begin() + output[r], output[r + 1] - output[r], r);
136 }
137 return output;
138 } else {
139 auto output = compress_sparse_triplets(ncol, values, column_indices, row_indices);
140 for (decltype(ncol) c = 0; c < ncol; ++c) {
141 std::fill_n(column_indices.begin() + output[c], output[c + 1] - output[c], c);
142 }
143 return output;
144 }
145}
146
147template<bool row_, class Values_, class RowIndices_, class ColumnIndices_>
148auto compress_sparse_triplets(std::size_t nrow, std::size_t ncol, Values_& values, RowIndices_& row_indices, ColumnIndices_& column_indices) {
149 return compress_sparse_triplets(nrow, ncol, values, row_indices, column_indices, row_);
150}
155}
156
157#endif
Copy data from one buffer to another.
Flexible representations for matrix data.
Definition Extractor.hpp:15
std::vector< Pointer_ > compress_sparse_triplets(const Extent_ num_primary, Values_ &values, const PrimaryIndices_ &primary_indices, SecondaryIndices_ &secondary_indices)
Definition compress_sparse_triplets.hpp:52