tatami_chunked
Helpers to create custom chunked tatami matrices
Loading...
Searching...
No Matches
mock_sparse_chunk.hpp
Go to the documentation of this file.
1#ifndef TATAMI_CHUNKED_MOCK_SPARSE_CHUNK_HPP
2#define TATAMI_CHUNKED_MOCK_SPARSE_CHUNK_HPP
3
4#include <vector>
5#include <cstdint>
6
12namespace tatami_chunked {
13
17namespace MockSparseChunk_internal {
18
19template<typename InflatedValue_, typename InflatedIndex_>
20struct Workspace {
21 // Standard compressed sparse members:
22 std::vector<InflatedValue_> values;
23 std::vector<InflatedIndex_> indices;
24 std::vector<size_t> indptrs;
25
26 // Allocation to allow for O(1) mapping of requested indices to sparse indices.
27 // This mimics what is done in the indexed sparse extractors in tatami proper.
28 std::vector<uint8_t> remap;
29};
30
31template<class Blob_>
32class Core {
33public:
34 Core() = default;
35
36 Core(Blob_ chunk) : my_chunk(std::move(chunk)) {}
37
38private:
39 Blob_ my_chunk;
40
41 typedef typename Blob_::value_type value_type;
42
43 typedef typename Blob_::index_type index_type;
44
45public:
46 auto get_target_chunkdim(bool row) const {
47 if (row) {
48 return my_chunk.nrow();
49 } else {
50 return my_chunk.ncol();
51 }
52 }
53
54 auto get_non_target_chunkdim(bool row) const {
55 if (row) {
56 return my_chunk.ncol();
57 } else {
58 return my_chunk.nrow();
59 }
60 }
61
62 typedef Workspace<value_type, index_type> MyWorkspace;
63
64private:
65 template<typename Index_>
66 static void refine_start_and_end(size_t& start, size_t& end, Index_ desired_start, Index_ desired_end, Index_ max_end, const std::vector<index_type>& indices) {
67 if (desired_start) {
68 auto it = indices.begin();
69 // Using custom comparator to ensure that we cast to Index_ for signedness-safe comparisons.
70 start = std::lower_bound(it + start, it + end, desired_start, [](Index_ a, Index_ b) -> bool { return a < b; }) - it;
71 }
72
73 if (desired_end != max_end) {
74 if (desired_end == desired_start + 1) {
75 if (start != end && static_cast<Index_>(indices[start]) == desired_start) {
76 end = start + 1;
77 } else {
78 end = start;
79 }
80 } else {
81 auto it = indices.begin();
82 end = std::lower_bound(it + start, it + end, desired_end, [](Index_ a, Index_ b) -> bool { return a < b; }) - it;
83 }
84 }
85 }
86
87 // Building a present/absent mapping for the requested indices to allow O(1) look-up from the sparse matrix indices.
88 template<typename Index_>
89 static void configure_remap(MyWorkspace& work, const std::vector<Index_>& indices, size_t full) {
90 work.remap.resize(full);
91 for (auto i : indices) {
92 work.remap[i] = 1;
93 }
94 }
95
96 // Resetting just the affected indices so we can avoid a fill operation over the entire array.
97 template<typename Index_>
98 static void reset_remap(MyWorkspace& work, const std::vector<Index_>& indices) {
99 for (auto i : indices) {
100 work.remap[i] = 0;
101 }
102 }
103
104private:
105 template<bool is_block_, typename Index_>
106 void fill_target(
107 Index_ p,
108 Index_ non_target_start,
109 Index_ non_target_end,
110 Index_ non_target_chunkdim,
111 MyWorkspace& work,
112 const std::vector<value_type*>& output_values,
113 const std::vector<Index_*>& output_indices,
114 Index_* output_number,
115 Index_ shift)
116 const {
117 size_t start = work.indptrs[p], end = work.indptrs[p + 1];
118 if (start >= end) {
119 return;
120 }
121
122 refine_start_and_end(start, end, non_target_start, non_target_end, non_target_chunkdim, work.indices);
123
124 auto& current_number = output_number[p];
125 const bool needs_value = !output_values.empty();
126 auto vptr = needs_value ? output_values[p] + current_number : NULL;
127 const bool needs_index = !output_indices.empty();
128 auto iptr = needs_index ? output_indices[p] + current_number : NULL;
129
130 if constexpr(is_block_) {
131 if (needs_value) {
132 std::copy(work.values.begin() + start, work.values.begin() + end, vptr);
133 }
134 if (needs_index) {
135 for (size_t i = start; i < end; ++i, ++iptr) {
136 *iptr = work.indices[i] + shift;
137 }
138 }
139 current_number += end - start;
140
141 } else {
142 // Assumes that work.remap has been properly configured, see configure_remap().
143 for (size_t i = start; i < end; ++i) {
144 Index_ target = work.indices[i];
145 if (work.remap[target]) {
146 if (needs_value) {
147 *vptr = work.values[i];
148 ++vptr;
149 }
150 if (needs_index) {
151 *iptr = target + shift;
152 ++iptr;
153 }
154 ++current_number;
155 }
156 }
157 }
158 }
159
160 template<bool is_block_, typename Index_>
161 void fill_secondary(
162 Index_ s,
163 Index_ target_start,
164 Index_ target_end,
165 Index_ target_chunkdim,
166 MyWorkspace& work,
167 const std::vector<value_type*>& output_values,
168 const std::vector<Index_*>& output_indices,
169 Index_* output_number,
170 Index_ shift)
171 const {
172 auto start = work.indptrs[s], end = work.indptrs[s + 1];
173 if (start >= end) {
174 return;
175 }
176
177 refine_start_and_end(start, end, target_start, target_end, target_chunkdim, work.indices);
178
179 bool needs_value = !output_values.empty();
180 bool needs_index = !output_indices.empty();
181
182 if constexpr(is_block_) {
183 for (size_t i = start; i < end; ++i) {
184 auto p = work.indices[i];
185 auto& num = output_number[p];
186 if (needs_value) {
187 output_values[p][num] = work.values[i];
188 }
189 if (needs_index) {
190 output_indices[p][num] = s + shift;
191 }
192 ++num;
193 }
194
195 } else {
196 // Assumes that work.remap has been properly configured, see configure_remap().
197 for (size_t i = start; i < end; ++i) {
198 Index_ target = work.indices[i];
199 if (work.remap[target]) {
200 auto& num = output_number[target];
201 if (needs_value) {
202 output_values[target][num] = work.values[i];
203 }
204 if (needs_index) {
205 output_indices[target][num] = s + shift;
206 }
207 ++num;
208 }
209 }
210 }
211 }
212
213public:
214 template<typename Index_>
215 void extract(
216 bool row,
217 Index_ target_start,
218 Index_ target_length,
219 Index_ non_target_start,
220 Index_ non_target_length,
221 MyWorkspace& work,
222 const std::vector<value_type*>& output_values,
223 const std::vector<Index_*>& output_indices,
224 Index_* output_number,
225 Index_ shift)
226 const {
227 my_chunk.inflate(work.values, work.indices, work.indptrs);
228 Index_ target_end = target_start + target_length;
229 Index_ non_target_end = non_target_start + non_target_length;
230
231 if (my_chunk.is_csr() == row) {
232 Index_ non_target_chunkdim = get_non_target_chunkdim(row);
233 for (Index_ p = target_start; p < target_end; ++p) {
234 fill_target<true>(p, non_target_start, non_target_end, non_target_chunkdim, work, output_values, output_indices, output_number, shift);
235 }
236 } else {
237 Index_ target_chunkdim = get_target_chunkdim(row);
238 for (Index_ s = non_target_start; s < non_target_end; ++s) {
239 fill_secondary<true>(s, target_start, target_end, target_chunkdim, work, output_values, output_indices, output_number, shift);
240 }
241 }
242 }
243
244 template<typename Index_>
245 void extract(
246 bool row,
247 Index_ target_start,
248 Index_ target_length,
249 const std::vector<Index_>& non_target_indices,
250 MyWorkspace& work,
251 const std::vector<value_type*>& output_values,
252 const std::vector<Index_*>& output_indices,
253 Index_* output_number,
254 Index_ shift)
255 const {
256 my_chunk.inflate(work.values, work.indices, work.indptrs);
257 Index_ target_end = target_start + target_length;
258
259 if (my_chunk.is_csr() == row) {
260 // non_target_indices is guaranteed to be non-empty, see contracts below.
261 auto non_target_start = non_target_indices.front();
262 auto non_target_end = non_target_indices.back() + 1; // need 1 past end.
263 auto non_target_chunkdim = get_non_target_chunkdim(row);
264
265 configure_remap(work, non_target_indices, non_target_chunkdim);
266 for (Index_ p = target_start; p < target_end; ++p) {
267 fill_target<false>(p, non_target_start, non_target_end, non_target_chunkdim, work, output_values, output_indices, output_number, shift);
268 }
269 reset_remap(work, non_target_indices);
270
271 } else {
272 Index_ target_chunkdim = get_target_chunkdim(row);
273 for (auto s : non_target_indices) {
274 fill_secondary<true>(s, target_start, target_end, target_chunkdim, work, output_values, output_indices, output_number, shift);
275 }
276 }
277 }
278
279public:
280 template<typename Index_>
281 void extract(
282 bool row,
283 const std::vector<Index_>& target_indices,
284 Index_ non_target_start,
285 Index_ non_target_length,
286 MyWorkspace& work,
287 const std::vector<value_type*>& output_values,
288 const std::vector<Index_*>& output_indices,
289 Index_* output_number,
290 Index_ shift)
291 const {
292 my_chunk.inflate(work.values, work.indices, work.indptrs);
293 Index_ non_target_end = non_target_start + non_target_length;
294
295 if (my_chunk.is_csr() == row) {
296 Index_ non_target_chunkdim = get_non_target_chunkdim(row);
297 for (auto p : target_indices) {
298 fill_target<true>(p, non_target_start, non_target_end, non_target_chunkdim, work, output_values, output_indices, output_number, shift);
299 }
300
301 } else {
302 // target_indices is guaranteed to be non-empty, see contracts below.
303 auto target_start = target_indices.front();
304 auto target_end = target_indices.back() + 1; // need 1 past end.
305 auto target_chunkdim = get_target_chunkdim(row);
306
307 configure_remap(work, target_indices, target_chunkdim);
308 for (Index_ s = non_target_start; s < non_target_end; ++s) {
309 fill_secondary<false>(s, target_start, target_end, target_chunkdim, work, output_values, output_indices, output_number, shift);
310 }
311 reset_remap(work, target_indices);
312 }
313 }
314
315 template<typename Index_>
316 void extract(
317 bool row,
318 const std::vector<Index_>& target_indices,
319 const std::vector<Index_>& non_target_indices,
320 MyWorkspace& work,
321 const std::vector<value_type*>& output_values,
322 const std::vector<Index_*>& output_indices,
323 Index_* output_number,
324 Index_ shift)
325 const {
326 my_chunk.inflate(work.values, work.indices, work.indptrs);
327
328 if (my_chunk.is_csr() == row) {
329 // non_target_indices is guaranteed to be non-empty, see contracts below.
330 auto non_target_start = non_target_indices.front();
331 auto non_target_end = non_target_indices.back() + 1; // need 1 past end.
332 auto non_target_chunkdim = get_non_target_chunkdim(row);
333
334 configure_remap(work, non_target_indices, non_target_chunkdim);
335 for (auto p : target_indices) {
336 fill_target<false>(p, non_target_start, non_target_end, non_target_chunkdim, work, output_values, output_indices, output_number, shift);
337 }
338 reset_remap(work, non_target_indices);
339
340 } else {
341 // target_indices is guaranteed to be non-empty, see contracts below.
342 auto target_start = target_indices.front();
343 auto target_end = target_indices.back() + 1; // need 1 past end.
344 auto target_chunkdim = get_target_chunkdim(row);
345
346 configure_remap(work, target_indices, target_chunkdim);
347 for (auto s : non_target_indices) {
348 fill_secondary<false>(s, target_start, target_end, target_chunkdim, work, output_values, output_indices, output_number, shift);
349 }
350 reset_remap(work, target_indices);
351 }
352 }
353};
354
355class MockBlob {
356public:
357 typedef int index_type;
358 typedef double value_type;
359
360 bool is_csr() const {
361 return true;
362 }
363
364private:
365 int my_nrow, my_ncol;
366 std::vector<double> my_values;
367 std::vector<int> my_indices;
368 std::vector<size_t> my_pointers;
369
370public:
371 MockBlob() = default;
372
373 MockBlob(int nrow, int ncol, std::vector<double> values, std::vector<int> indices, std::vector<size_t> pointers) :
374 my_nrow(nrow), my_ncol(ncol), my_values(std::move(values)), my_indices(std::move(indices)), my_pointers(std::move(pointers)) {}
375
376 int nrow() const {
377 return my_nrow;
378 }
379
380 int ncol() const {
381 return my_ncol;
382 }
383
384 void inflate(std::vector<double>& vbuffer, std::vector<int>& ibuffer, std::vector<size_t>& pbuffer) const {
385 vbuffer.resize(my_values.size());
386 std::copy(my_values.begin(), my_values.end(), vbuffer.begin());
387 ibuffer.resize(my_indices.size());
388 std::copy(my_indices.begin(), my_indices.end(), ibuffer.begin());
389 pbuffer.resize(my_pointers.size());
390 std::copy(my_pointers.begin(), my_pointers.end(), pbuffer.begin());
391 }
392};
393
394}
408public:
413 typedef double value_type;
414
420 struct Workspace {
424 // Hiding this here to avoid giving the impression that we NEED to implement this.
425 MockSparseChunk_internal::Workspace<value_type, int> work;
429 };
430
435 static constexpr bool use_subset = false;
436
437public:
441 // You can construct this however you like, I don't care.
442 MockSimpleSparseChunk() = default;
443 MockSimpleSparseChunk(int nr, int nc, std::vector<double> x, std::vector<int> i, std::vector<size_t> p) :
449private:
450 MockSparseChunk_internal::Core<MockSparseChunk_internal::MockBlob> core;
451
452public:
487 template<typename Index_>
489 bool row,
492 Workspace& work,
493 const std::vector<value_type*>& output_values,
494 const std::vector<Index_*>& output_indices,
497 const {
498 core.template extract<Index_>(
499 row,
500 0,
501 core.get_target_chunkdim(row),
504 work.work,
508 shift
509 );
510 }
511
544 template<typename Index_>
546 bool row,
547 const std::vector<Index_>& non_target_indices,
548 Workspace& work,
549 const std::vector<value_type*>& output_values,
550 const std::vector<Index_*>& output_indices,
553 const {
554 core.template extract<Index_>(
555 row,
556 0,
557 core.get_target_chunkdim(row),
559 work.work,
563 shift
564 );
565 }
566};
567
590template<class Blob_>
595public:
596 typedef typename Blob_::value_type value_type;
597
598 typedef MockSparseChunk_internal::Workspace<value_type, typename Blob_::index_type> Workspace;
599
600 static constexpr bool use_subset = false;
601
602 SimpleSparseChunkWrapper() = default;
603
604 SimpleSparseChunkWrapper(Blob_ core) : my_core(std::move(core)) {}
605
606private:
607 MockSparseChunk_internal::Core<Blob_> my_core;
608
609public:
610 template<typename Index_>
611 void extract(
612 bool row,
615 Workspace& work,
616 const std::vector<value_type*>& output_values,
617 const std::vector<Index_*>& output_indices,
620 const {
621 my_core.template extract<Index_>(
622 row,
623 0,
624 my_core.get_target_chunkdim(row),
627 work,
631 shift
632 );
633 }
634
635 template<typename Index_>
636 void extract(
637 bool row,
638 const std::vector<Index_>& non_target_indices,
639 Workspace& work,
640 const std::vector<value_type*>& output_values,
641 const std::vector<Index_*>& output_indices,
644 const {
645 my_core.template extract<Index_>(
646 row,
647 0,
648 my_core.get_target_chunkdim(row),
650 work,
654 shift
655 );
656 }
660};
661
673public:
678 typedef double value_type;
679
685 struct Workspace {
689 // Hiding this here to avoid giving the impression that we NEED to implement this.
690 MockSparseChunk_internal::Workspace<value_type, int> work;
694 };
695
700 static constexpr bool use_subset = true;
701
705 MockSubsettedSparseChunk() = default;
706 MockSubsettedSparseChunk(int nrow, int ncol, std::vector<double> values, std::vector<int> indices, std::vector<size_t> pointers) :
707 my_core(MockSparseChunk_internal::MockBlob(nrow, ncol, std::move(values), std::move(indices), std::move(pointers))) {}
712private:
713 MockSparseChunk_internal::Core<MockSparseChunk_internal::MockBlob> my_core;
714
715public:
755 template<typename Index_>
757 bool row,
762 Workspace& work,
763 const std::vector<value_type*>& output_values,
764 const std::vector<Index_*>& output_indices,
767 const {
768 my_core.extract(
769 row,
774 work.work,
778 shift
779 );
780 }
781
819 template<typename Index_>
821 bool row,
824 const std::vector<Index_>& non_target_indices,
825 Workspace& work,
826 const std::vector<value_type*>& output_values,
827 const std::vector<Index_*>& output_indices,
830 const {
831 my_core.extract(
832 row,
836 work.work,
840 shift
841 );
842 }
843
844public:
882 template<typename Index_>
884 bool row,
885 const std::vector<Index_>& target_indices,
888 Workspace& work,
889 const std::vector<value_type*>& output_values,
890 const std::vector<Index_*>& output_indices,
893 const {
894 my_core.extract(
895 row,
899 work.work,
903 shift
904 );
905 }
906
942 template<typename Index_>
944 bool row,
945 const std::vector<Index_>& target_indices,
946 const std::vector<Index_>& non_target_indices,
947 Workspace& work,
948 const std::vector<value_type*>& output_values,
949 const std::vector<Index_*>& output_indices,
952 const {
953 my_core.extract(
954 row,
957 work.work,
961 shift
962 );
963 }
964};
965
966}
967
968#endif
Mock a simple sparse chunk for a CustomSparseChunkedMatrix.
Definition mock_sparse_chunk.hpp:407
static constexpr bool use_subset
Definition mock_sparse_chunk.hpp:435
double value_type
Definition mock_sparse_chunk.hpp:413
void extract(bool row, Index_ non_target_start, Index_ non_target_length, Workspace &work, const std::vector< value_type * > &output_values, const std::vector< Index_ * > &output_indices, Index_ *output_number, Index_ shift) const
Definition mock_sparse_chunk.hpp:488
void extract(bool row, const std::vector< Index_ > &non_target_indices, Workspace &work, const std::vector< value_type * > &output_values, const std::vector< Index_ * > &output_indices, Index_ *output_number, Index_ shift) const
Definition mock_sparse_chunk.hpp:545
Mock a subsettable sparse chunk for a CustomSparseChunkedMatrix.
Definition mock_sparse_chunk.hpp:672
void extract(bool row, const std::vector< Index_ > &target_indices, Index_ non_target_start, Index_ non_target_length, Workspace &work, const std::vector< value_type * > &output_values, const std::vector< Index_ * > &output_indices, Index_ *output_number, Index_ shift) const
Definition mock_sparse_chunk.hpp:883
void extract(bool row, Index_ target_start, Index_ target_length, const std::vector< Index_ > &non_target_indices, Workspace &work, const std::vector< value_type * > &output_values, const std::vector< Index_ * > &output_indices, Index_ *output_number, Index_ shift) const
Definition mock_sparse_chunk.hpp:820
void extract(bool row, Index_ target_start, Index_ target_length, Index_ non_target_start, Index_ non_target_length, Workspace &work, const std::vector< value_type * > &output_values, const std::vector< Index_ * > &output_indices, Index_ *output_number, Index_ shift) const
Definition mock_sparse_chunk.hpp:756
double value_type
Definition mock_sparse_chunk.hpp:678
void extract(bool row, const std::vector< Index_ > &target_indices, const std::vector< Index_ > &non_target_indices, Workspace &work, const std::vector< value_type * > &output_values, const std::vector< Index_ * > &output_indices, Index_ *output_number, Index_ shift) const
Definition mock_sparse_chunk.hpp:943
static constexpr bool use_subset
Definition mock_sparse_chunk.hpp:700
Create a sparse chunk for a CustomSparseChunkedMatrix.
Definition mock_sparse_chunk.hpp:591
Methods to handle chunked tatami matrices.
Definition ChunkDimensionStats.hpp:4
Statistics for regular chunks along a dimension.
Definition ChunkDimensionStats.hpp:35
ChunkDimensionStats()
Definition ChunkDimensionStats.hpp:50
Definition mock_sparse_chunk.hpp:420
Definition mock_sparse_chunk.hpp:685