tatami_mtx
Matrix Market to tatami matrices
Loading...
Searching...
No Matches
load_matrix.hpp
Go to the documentation of this file.
1#ifndef TATAMI_MTX_SIMPLE_HPP
2#define TATAMI_MTX_SIMPLE_HPP
3
4#include "tatami/tatami.hpp"
5#include "eminem/eminem.hpp"
6#include "byteme/byteme.hpp"
7
13namespace tatami_mtx {
14
20struct Automatic {};
21
25struct Options {
30 bool row = true;
31
37 size_t buffer_size = 65536;
38
43 bool parallel = false;
44
49 int compression = 3;
50};
51
55namespace internal {
56
57template<bool row_, typename Value_, typename Index_, typename StoredValue_, typename StoredIndex_, typename TempIndex_, typename Parser_>
58std::shared_ptr<tatami::Matrix<Value_, Index_> > load_sparse_matrix_basic(Parser_& parser, eminem::Field field, size_t NR, size_t NC, size_t NL) {
59 std::vector<typename std::conditional<row_, TempIndex_, StoredIndex_>::type> rows;
60 std::vector<typename std::conditional<!row_, TempIndex_, StoredIndex_>::type> columns;
61 rows.reserve(NL), columns.reserve(NL);
62 std::vector<StoredValue_> values;
63 values.reserve(NL);
64
65 if (field == eminem::Field::INTEGER) {
66 parser.scan_integer([&](size_t r, size_t c, int v) -> void {
67 values.push_back(v);
68 rows.push_back(r - 1);
69 columns.push_back(c - 1);
70 });
71
72 } else if (field == eminem::Field::REAL || field == eminem::Field::DOUBLE) {
73 parser.scan_real([&](size_t r, size_t c, double v) -> void {
74 values.push_back(v);
75 rows.push_back(r - 1);
76 columns.push_back(c - 1);
77 });
78
79 } else {
80 throw std::runtime_error("unsupported Matrix Market field type");
81 }
82
83 auto ptr = tatami::compress_sparse_triplets<row_>(NR, NC, values, rows, columns);
84 std::vector<StoredIndex_> indices;
85 if constexpr(row_) {
86 indices.swap(columns);
87 } else {
88 indices.swap(rows);
89 }
90
91 return std::shared_ptr<tatami::Matrix<Value_, Index_> >(
93 NR, NC, std::move(values), std::move(indices), std::move(ptr), row_, false
94 )
95 );
96}
97
98template<bool row_, typename Value_, typename Index_, typename StoredValue_, typename StoredIndex_, typename TempIndex_, typename Parser_>
99std::shared_ptr<tatami::Matrix<Value_, Index_> > load_sparse_matrix_data(Parser_& parser, eminem::Field field, size_t NR, size_t NC, size_t NL) {
100 if constexpr(std::is_same<StoredValue_, Automatic>::value) {
101 if (field == eminem::Field::REAL || field == eminem::Field::DOUBLE) {
102 return load_sparse_matrix_basic<row_, Value_, Index_, double, StoredIndex_, TempIndex_>(parser, field, NR, NC, NL);
103 }
104 if (field != eminem::Field::INTEGER) {
105 throw std::runtime_error("unsupported Matrix Market field type");
106 }
107 return load_sparse_matrix_basic<row_, Value_, Index_, int, StoredIndex_, TempIndex_>(parser, field, NR, NC, NL);
108 } else {
109 return load_sparse_matrix_basic<row_, Value_, Index_, StoredValue_, StoredIndex_, TempIndex_>(parser, field, NR, NC, NL);
110 }
111}
112
113template<bool row_, typename Value_, typename Index_, typename StoredValue_, typename StoredIndex_, typename TempIndex_, typename Parser_>
114std::shared_ptr<tatami::Matrix<Value_, Index_> > load_sparse_matrix_index(Parser_& parser, eminem::Field field, size_t NR, size_t NC, size_t NL) {
115 if constexpr(std::is_same<StoredIndex_, Automatic>::value) {
116 // Automatically choosing a smaller integer type, if it fits.
117 constexpr size_t limit8 = std::numeric_limits<uint8_t>::max(), limit16 = std::numeric_limits<uint16_t>::max();
118 size_t target = (row_ ? NC : NR);
119
120 if (target <= limit8) {
121 return load_sparse_matrix_data<row_, Value_, Index_, StoredValue_, uint8_t, TempIndex_>(parser, field, NR, NC, NL);
122 } else if (target <= limit16) {
123 return load_sparse_matrix_data<row_, Value_, Index_, StoredValue_, uint16_t, TempIndex_>(parser, field, NR, NC, NL);
124 } else {
125 return load_sparse_matrix_data<row_, Value_, Index_, StoredValue_, uint32_t, TempIndex_>(parser, field, NR, NC, NL);
126 }
127
128 } else {
129 return load_sparse_matrix_data<row_, Value_, Index_, StoredValue_, StoredIndex_, TempIndex_>(parser, field, NR, NC, NL);
130 }
131}
132
133template<bool row_, typename Value_, typename Index_, typename StoredValue_, typename Parser_>
134std::shared_ptr<tatami::Matrix<Value_, Index_> > load_dense_matrix_basic(Parser_& parser, eminem::Field field, size_t NR, size_t NC) {
135 std::vector<StoredValue_> values;
136 if constexpr(row_) {
137 values.resize(NR * NC);
138 } else {
139 values.reserve(NR * NC);
140 }
141
142 if (field == eminem::Field::INTEGER) {
143 parser.scan_integer([&](size_t r, size_t c, int v) -> void {
144 if constexpr(row_) {
145 values[(r - 1) * NC + (c - 1)] = v;
146 } else {
147 values.push_back(v); // Matrix Market ARRAY format is already column-major
148 }
149 });
150
151 } else if (field == eminem::Field::REAL || field == eminem::Field::DOUBLE) {
152 parser.scan_real([&](size_t r, size_t c, double v) -> void {
153 if constexpr(row_) {
154 values[(r - 1) * NC + (c - 1)] = v;
155 } else {
156 values.push_back(v);
157 }
158 });
159
160 } else {
161 throw std::runtime_error("unsupported Matrix Market field type");
162 }
163
164 return std::shared_ptr<tatami::Matrix<Value_, Index_> >(
165 new tatami::DenseMatrix<Value_, Index_, decltype(values)>(NR, NC, std::move(values), row_)
166 );
167}
168
169template<bool row_, bool parallel_, typename Value_, typename Index_, typename StoredValue_, typename StoredIndex_>
170std::shared_ptr<tatami::Matrix<Value_, Index_> > load_matrix(byteme::Reader& reader) {
171 eminem::Parser<parallel_> parser(&reader);
172 parser.scan_preamble();
173
174 const auto& banner = parser.get_banner();
175 auto field = banner.field;
176 auto format = banner.format;
177 size_t NR = parser.get_nrows(), NC = parser.get_ncols(), NL = parser.get_nlines();
178
179 if (format == eminem::Format::COORDINATE) {
180 // Automatically choosing a smaller integer type for the temporary index.
181 constexpr size_t limit8 = std::numeric_limits<uint8_t>::max(), limit16 = std::numeric_limits<uint16_t>::max();
182 auto primary = (row_ ? NR : NC);
183
184 if (primary <= limit8) {
185 return load_sparse_matrix_index<row_, Value_, Index_, StoredValue_, StoredIndex_, uint8_t>(parser, field, NR, NC, NL);
186 } else if (primary <= limit16) {
187 return load_sparse_matrix_index<row_, Value_, Index_, StoredValue_, StoredIndex_, uint16_t>(parser, field, NR, NC, NL);
188 } else {
189 return load_sparse_matrix_index<row_, Value_, Index_, StoredValue_, StoredIndex_, uint32_t>(parser, field, NR, NC, NL);
190 }
191
192 } else {
193 if constexpr(std::is_same<StoredValue_, Automatic>::value) {
194 if (field == eminem::Field::REAL || field == eminem::Field::DOUBLE) {
195 return load_dense_matrix_basic<row_, Value_, Index_, double>(parser, field, NR, NC);
196 }
197 if (field != eminem::Field::INTEGER) {
198 throw std::runtime_error("unsupported Matrix Market field type");
199 }
200 return load_dense_matrix_basic<row_, Value_, Index_, int>(parser, field, NR, NC);
201
202 } else {
203 return load_dense_matrix_basic<row_, Value_, Index_, StoredValue_>(parser, field, NR, NC);
204 }
205 }
206}
207
208}
230template<typename Value_, typename Index_, typename StoredValue_ = Automatic, typename StoredIndex_ = Automatic>
231std::shared_ptr<tatami::Matrix<Value_, Index_> > load_matrix(byteme::Reader& reader, const Options& options) {
232 if (options.row) {
233 if (options.parallel) {
234 return internal::load_matrix<true, true, Value_, Index_, StoredValue_, StoredIndex_>(reader);
235 } else {
236 return internal::load_matrix<true, false, Value_, Index_, StoredValue_, StoredIndex_>(reader);
237 }
238 } else {
239 if (options.parallel) {
240 return internal::load_matrix<false, true, Value_, Index_, StoredValue_, StoredIndex_>(reader);
241 } else {
242 return internal::load_matrix<false, false, Value_, Index_, StoredValue_, StoredIndex_>(reader);
243 }
244 }
245}
246
261template<typename Value_, typename Index_, typename StoredValue_ = Automatic, typename StoredIndex_ = Automatic>
262std::shared_ptr<tatami::Matrix<Value_, Index_> > load_matrix_from_text_file(const char* filepath, const Options& options) {
263 byteme::RawFileReader reader(filepath, options.buffer_size);
264 return load_matrix<Value_, Index_, StoredValue_, StoredIndex_>(reader, options);
265}
266
267#if __has_include("zlib.h")
268
282template<typename Value_, typename Index_, typename StoredValue_ = Automatic, typename StoredIndex_ = Automatic>
283std::shared_ptr<tatami::Matrix<Value_, Index_> > load_matrix_from_gzip_file(const char* filepath, const Options& options) {
284 byteme::GzipFileReader reader(filepath, options.buffer_size);
285 return load_matrix<Value_, Index_, StoredValue_, StoredIndex_>(reader, options);
286}
287
301template<typename Value_, typename Index_, typename StoredValue_ = Automatic, typename StoredIndex_ = Automatic>
302std::shared_ptr<tatami::Matrix<Value_, Index_> > load_matrix_from_some_file(const char* filepath, const Options& options) {
303 byteme::SomeFileReader reader(filepath, options.buffer_size);
304 return load_matrix<Value_, Index_, StoredValue_, StoredIndex_>(reader, options);
305}
306
307#endif
308
323template<typename Value_, typename Index_, typename StoredValue_ = Automatic, typename StoredIndex_ = Automatic>
324std::shared_ptr<tatami::Matrix<Value_, Index_> > load_matrix_from_text_buffer(const unsigned char* buffer, size_t n, const Options& options) {
325 byteme::RawBufferReader reader(buffer, n);
326 return load_matrix<Value_, Index_, StoredValue_, StoredIndex_>(reader, options);
327}
328
329#if __has_include("zlib.h")
330
345template<typename Value_, typename Index_, typename StoredValue_ = Automatic, typename StoredIndex_ = Automatic>
346std::shared_ptr<tatami::Matrix<Value_, Index_> > load_matrix_from_zlib_buffer(const unsigned char* buffer, size_t n, const Options& options) {
347 byteme::ZlibBufferReader reader(buffer, n, options.compression, options.buffer_size);
348 return load_matrix<Value_, Index_, StoredValue_, StoredIndex_>(reader, options);
349}
350
365template<typename Value_, typename Index_, typename StoredValue_ = Automatic, typename StoredIndex_ = Automatic>
366std::shared_ptr<tatami::Matrix<Value_, Index_> > load_matrix_from_some_buffer(const unsigned char* buffer, size_t n, const Options& options) {
367 byteme::SomeBufferReader reader(buffer, n, options.buffer_size);
368 return load_matrix<Value_, Index_, StoredValue_, StoredIndex_>(reader, options);
369}
370
371#endif
372
373}
374
375#endif
Read tatami matrices from Matrix Market files.
Definition load_matrix.hpp:13
std::shared_ptr< tatami::Matrix< Value_, Index_ > > load_matrix(byteme::Reader &reader, const Options &options)
Definition load_matrix.hpp:231
std::shared_ptr< tatami::Matrix< Value_, Index_ > > load_matrix_from_some_buffer(const unsigned char *buffer, size_t n, const Options &options)
Definition load_matrix.hpp:366
std::shared_ptr< tatami::Matrix< Value_, Index_ > > load_matrix_from_some_file(const char *filepath, const Options &options)
Definition load_matrix.hpp:302
std::shared_ptr< tatami::Matrix< Value_, Index_ > > load_matrix_from_text_buffer(const unsigned char *buffer, size_t n, const Options &options)
Definition load_matrix.hpp:324
std::shared_ptr< tatami::Matrix< Value_, Index_ > > load_matrix_from_zlib_buffer(const unsigned char *buffer, size_t n, const Options &options)
Definition load_matrix.hpp:346
std::shared_ptr< tatami::Matrix< Value_, Index_ > > load_matrix_from_gzip_file(const char *filepath, const Options &options)
Definition load_matrix.hpp:283
std::shared_ptr< tatami::Matrix< Value_, Index_ > > load_matrix_from_text_file(const char *filepath, const Options &options)
Definition load_matrix.hpp:262
std::vector< size_t > compress_sparse_triplets(size_t nrow, size_t ncol, Values_ &values, RowIndices_ &row_indices, ColumnIndices_ &column_indices, bool csr)
Enable automatic type determination.
Definition load_matrix.hpp:20
Options for load_matrix() and friends.
Definition load_matrix.hpp:25
size_t buffer_size
Definition load_matrix.hpp:37
int compression
Definition load_matrix.hpp:49
bool row
Definition load_matrix.hpp:30
bool parallel
Definition load_matrix.hpp:43