1#ifndef TATAMI_LAYERED_READ_LAYERED_SPARSE_FROM_MATRIX_MARKET_HPP
2#define TATAMI_LAYERED_READ_LAYERED_SPARSE_FROM_MATRIX_MARKET_HPP
8#include "byteme/byteme.hpp"
9#include "eminem/eminem.hpp"
11#include "sanisizer/sanisizer.hpp"
25template<
typename Value_,
typename Index_,
typename ColumnIndex_,
class Creator_>
26std::shared_ptr<tatami::Matrix<Value_, Index_> > read_layered_sparse_from_matrix_market(Creator_ create,
const Index_ chunk_size,
const int num_threads) {
27 Index_ NR, NC, nchunks, leftovers;
29 std::vector<Holder< std::uint8_t, Index_, ColumnIndex_> > store8;
30 std::vector<Holder<std::uint16_t, Index_, ColumnIndex_> > store16;
31 std::vector<Holder<std::uint32_t, Index_, ColumnIndex_> > store32;
33 std::vector<std::vector<Index_> > identities8, identities16, identities32;
34 std::vector<std::vector<Index_> > assigned_position;
35 std::vector<std::vector<Category> > assigned_category;
37 eminem::ParserOptions eopt;
38 eopt.num_threads = num_threads;
42 auto reader = create();
43 byteme::PerByteSerial<char, byteme::Reader*> pb(&reader);
44 eminem::Parser<I<
decltype(&pb)>, Index_> parser(&pb, eopt);
46 parser.scan_preamble();
47 NR = parser.get_nrows();
48 NC = parser.get_ncols();
49 leftovers = NC % chunk_size;
50 nchunks = sanisizer::max(1, NC / chunk_size + (leftovers != 0));
62 for (
auto& x : max_per_chunk) {
67 for (
auto& x : num_per_chunk) {
71 auto handler = [&](
const Index_ r,
const Index_ c,
const Category cat) ->
void {
72 const auto chunk = (c - 1) / chunk_size;
73 auto& maxcat = max_per_chunk[chunk][r - 1];
74 maxcat = std::max(maxcat, cat);
75 ++num_per_chunk[chunk][r - 1];
78 const auto& banner = parser.get_banner();
79 if (banner.field == eminem::Field::INTEGER) {
80 parser.template scan_integer<std::uint32_t>([&](
const Index_ r,
const Index_ c,
const std::uint32_t val) ->
void {
81 handler(r, c, categorize(val));
83 }
else if (banner.field == eminem::Field::DOUBLE || banner.field == eminem::Field::REAL) {
84 parser.scan_real([&](
const Index_ r,
const Index_ c,
const double val) ->
void {
85 handler(r, c, categorize(val));
88 throw std::runtime_error(
"expected a numeric field in the Matrix Market file");
107 std::vector<std::vector<std::size_t> > output_positions(nchunks);
108 for (I<
decltype(nchunks)> chunk = 0; chunk < nchunks; ++chunk) {
110 for (I<
decltype(NR)> r = 0; r < NR; ++r) {
111 output_positions[chunk][r] = get_sparse_ptr(store8, store16, store32, assigned_category, assigned_position, chunk, r);
115 auto reader = create();
116 byteme::PerByteSerial<char, byteme::Reader*> pb(&reader);
117 eminem::Parser<I<
decltype(&pb)>, Index_> parser(&pb, eopt);
119 auto handler = [&](Index_ r, Index_ c,
const auto val) ->
void {
121 const Index_ chunk = c / chunk_size;
122 const Index_ offset = c % chunk_size;
124 fill_sparse_value(store8, store16, store32, assigned_category[chunk][r], chunk, offset, val, output_positions[chunk][r]++);
127 parser.scan_preamble();
128 const auto& banner = parser.get_banner();
129 if (banner.field == eminem::Field::INTEGER) {
130 parser.template scan_integer<std::uint32_t>([&](
const Index_ r,
const Index_ c,
const std::uint32_t val) ->
void {
133 }
else if (banner.field == eminem::Field::DOUBLE || banner.field == eminem::Field::REAL) {
134 parser.scan_real([&](
const Index_ r,
const Index_ c,
const double val) ->
void {
140 auto sorter = [&](
auto& store) ->
void {
141 std::vector<std::pair<I<
decltype(store[0].index[0])>, I<
decltype(store[0].value[0])>> > buffer;
142 buffer.reserve(chunk_size);
144 for (
auto& st : store) {
145 const auto num_ptr = st.ptr.size();
146 for (I<
decltype(num_ptr)> r = 1; r < num_ptr; ++r) {
147 const auto start = st.ptr[r - 1], end = st.ptr[r];
149 if (!std::is_sorted(st.index.begin() + start, st.index.begin() + end)) {
151 for (
auto i = start; i < end; ++i) {
152 buffer.emplace_back(st.index[i], st.value[i]);
155 std::sort(buffer.begin(), buffer.end());
156 auto bIt = buffer.begin();
157 for (
auto i = start; i < end; ++i, ++bIt) {
158 st.index[i] = bIt->first;
159 st.value[i] = bIt->second;
171 return consolidate_matrices<Value_, Index_>(
221template<
typename Value_ =
double,
typename Index_ =
int,
typename ColumnIndex_ = std::u
int16_t>
223 return read_layered_sparse_from_matrix_market<Value_, Index_, ColumnIndex_>(
225 return byteme::RawFileReader(filepath, [&]{
226 byteme::RawFileReaderOptions opt;
231 check_chunk_size<Index_, ColumnIndex_>(options.
chunk_size),
240template<
typename Value_ =
double,
typename Index_ =
int,
typename ColumnIndex_ = std::u
int16_t>
243 ReadLayeredSparseFromMatrixMarketOptions opt;
244 opt.chunk_size = chunk_size;
245 opt.buffer_size = buffer_size;
253#if __has_include("zlib.h")
269template<
typename Value_ =
double,
typename Index_ =
int,
typename ColumnIndex_ = std::u
int16_t>
271 return read_layered_sparse_from_matrix_market<Value_, Index_, ColumnIndex_>(
273 return byteme::SomeFileReader(filepath, [&]{
274 byteme::SomeFileReaderOptions opt;
279 check_chunk_size<Index_, ColumnIndex_>(options.
chunk_size),
298template<
typename Value_ =
double,
typename Index_ =
int,
typename ColumnIndex_ = std::u
int16_t>
300 return read_layered_sparse_from_matrix_market<Value_, Index_, ColumnIndex_>(
302 return byteme::GzipFileReader(filepath, [&]{
303 byteme::GzipFileReaderOptions opt;
308 check_chunk_size<Index_, ColumnIndex_>(options.
chunk_size),
317template<
typename Value_ =
double,
typename Index_ =
int,
typename ColumnIndex_ = std::u
int16_t>
320 ReadLayeredSparseFromMatrixMarketOptions opt;
321 opt.chunk_size = chunk_size;
322 opt.buffer_size = buffer_size;
327template<
typename Value_ =
double,
typename Index_ =
int,
typename ColumnIndex_ = std::u
int16_t>
330 ReadLayeredSparseFromMatrixMarketOptions opt;
331 opt.chunk_size = chunk_size;
332 opt.buffer_size = buffer_size;
358template<
typename Value_ =
double,
typename Index_ =
int,
typename ColumnIndex_ = std::u
int16_t>
360 const unsigned char* contents,
364 return read_layered_sparse_from_matrix_market<Value_, Index_, ColumnIndex_>(
366 return byteme::RawBufferReader(contents, length);
368 check_chunk_size<Index_, ColumnIndex_>(options.
chunk_size),
377template<
typename Value_ =
double,
typename Index_ =
int,
typename ColumnIndex_ = std::u
int16_t>
380 ReadLayeredSparseFromMatrixMarketOptions opt;
381 opt.chunk_size = chunk_size;
390#if __has_include("zlib.h")
407template<
typename Value_ =
double,
typename Index_ =
int,
typename ColumnIndex_ = std::u
int16_t>
409 const unsigned char* contents,
413 return read_layered_sparse_from_matrix_market<Value_, Index_, ColumnIndex_>(
415 return byteme::SomeBufferReader(contents, length, [&]{
416 byteme::SomeBufferReaderOptions opt;
421 check_chunk_size<Index_, ColumnIndex_>(options.
chunk_size),
441template<
typename Value_ =
double,
typename Index_ =
int,
typename ColumnIndex_ = std::u
int16_t>
443 const unsigned char* contents,
447 return read_layered_sparse_from_matrix_market<Value_, Index_, ColumnIndex_>(
449 return byteme::ZlibBufferReader(contents, length, [&]{
450 byteme::ZlibBufferReaderOptions opt;
455 check_chunk_size<Index_, ColumnIndex_>(options.
chunk_size),
464template<
typename Value_ =
double,
typename Index_ =
int,
typename ColumnIndex_ = std::u
int16_t>
466 const unsigned char* contents,
468 Index_ chunk_size = 65536,
469 std::size_t buffer_size = 65536)
472 ReadLayeredSparseFromMatrixMarketOptions opt;
473 opt.chunk_size = chunk_size;
474 opt.buffer_size = buffer_size;
479template<
typename Value_ =
double,
typename Index_ =
int,
typename ColumnIndex_ = std::u
int16_t>
481 const unsigned char* contents,
483 Index_ chunk_size = 65536,
484 std::size_t buffer_size = 65536)
487 ReadLayeredSparseFromMatrixMarketOptions opt;
488 opt.chunk_size = chunk_size;
489 opt.buffer_size = buffer_size;
Create layered sparse matrices for tatami.
Definition convert_to_layered_sparse.hpp:20
std::shared_ptr< tatami::Matrix< Value_, Index_ > > read_layered_sparse_from_matrix_market_some_buffer(const unsigned char *contents, std::size_t length, const ReadLayeredSparseFromMatrixMarketOptions &options)
Definition read_layered_sparse_from_matrix_market.hpp:408
std::shared_ptr< tatami::Matrix< Value_, Index_ > > read_layered_sparse_from_matrix_market_some_file(const char *filepath, const ReadLayeredSparseFromMatrixMarketOptions &options)
Definition read_layered_sparse_from_matrix_market.hpp:270
std::shared_ptr< tatami::Matrix< Value_, Index_ > > read_layered_sparse_from_matrix_market_gzip_file(const char *filepath, const ReadLayeredSparseFromMatrixMarketOptions &options)
Definition read_layered_sparse_from_matrix_market.hpp:299
std::shared_ptr< tatami::Matrix< Value_, Index_ > > read_layered_sparse_from_matrix_market_text_buffer(const unsigned char *contents, std::size_t length, const ReadLayeredSparseFromMatrixMarketOptions &options)
Definition read_layered_sparse_from_matrix_market.hpp:359
std::shared_ptr< tatami::Matrix< Value_, Index_ > > read_layered_sparse_from_matrix_market_zlib_buffer(const unsigned char *contents, std::size_t length, const ReadLayeredSparseFromMatrixMarketOptions &options)
Definition read_layered_sparse_from_matrix_market.hpp:442
std::shared_ptr< tatami::Matrix< Value_, Index_ > > read_layered_sparse_from_matrix_market_text_file(const char *filepath, const ReadLayeredSparseFromMatrixMarketOptions &options)
Definition read_layered_sparse_from_matrix_market.hpp:222
void resize_container_to_Index_size(Container_ &container, const Index_ x, Args_ &&... args)
Container_ create_container_of_Index_size(const Index_ x, Args_ &&... args)
Definition read_layered_sparse_from_matrix_market.hpp:190
std::size_t buffer_size
Definition read_layered_sparse_from_matrix_market.hpp:199
std::size_t chunk_size
Definition read_layered_sparse_from_matrix_market.hpp:194
int num_threads
Definition read_layered_sparse_from_matrix_market.hpp:204