eminem
Parse Matrix Market files in C++
Loading...
Searching...
No Matches
Parser.hpp
Go to the documentation of this file.
1#ifndef EMINEM_PARSER_HPP
2#define EMINEM_PARSER_HPP
3
4#include <vector>
5#include <string>
6#include <complex>
7#include <type_traits>
8#include <stdexcept>
9#include <memory>
10#include <thread>
11#include <mutex>
12#include <condition_variable>
13#include <limits>
14
16#include "byteme/PerByte.hpp"
17#include "sanisizer/sanisizer.hpp"
18
19#include "utils.hpp"
20
27namespace eminem {
28
33typedef unsigned long long Index;
34
42 int num_threads = 1;
43
49 std::size_t block_size = sanisizer::cap<std::size_t>(65536);
50};
51
55template<typename Workspace_>
56class ThreadPool {
57public:
58 template<typename RunJob_>
59 ThreadPool(RunJob_ run_job, int num_threads) :
60 my_helpers(sanisizer::cast<decltype(my_helpers.size())>(num_threads))
61 {
62 std::mutex init_mut;
63 std::condition_variable init_cv;
64 int num_initialized = 0;
65
66 my_threads.reserve(num_threads);
67 for (int t = 0; t < num_threads; ++t) {
68 // Copy lambda as it will be gone once this constructor finishes.
69 my_threads.emplace_back([run_job,this,&init_mut,&init_cv,&num_initialized](int thread) -> void {
70 Helper env; // allocating this locally within each thread to reduce the risk of false sharing.
71 my_helpers[thread] = &env;
72 {
73 std::lock_guard lck(init_mut);
74 ++num_initialized;
75 init_cv.notify_one();
76 }
77
78 while (1) {
79 std::unique_lock lck(env.mut);
80 env.cv.wait(lck, [&]() -> bool { return env.input_ready; });
81 if (env.terminated) {
82 return;
83 }
84 env.input_ready = false;
85
86 try {
87 run_job(env.work);
88 } catch (...) {
89 std::lock_guard elck(my_error_mut);
90 if (!my_error) {
91 my_error = std::current_exception();
92 }
93 }
94
95 env.has_output = true;
96 env.available = true;
97 env.cv.notify_one();
98 }
99 }, t);
100 }
101
102 // Only returning once all threads (and their specific mutexes) are initialized.
103 {
104 std::unique_lock ilck(init_mut);
105 init_cv.wait(ilck, [&]() -> bool { return num_initialized == num_threads; });
106 }
107 }
108
109 ~ThreadPool() {
110 for (auto envptr : my_helpers) {
111 auto& env = *envptr;
112 {
113 std::lock_guard lck(env.mut);
114 env.terminated = true;
115 env.input_ready = true;
116 }
117 env.cv.notify_one();
118 }
119 for (auto& thread : my_threads) {
120 thread.join();
121 }
122 }
123
124private:
125 std::vector<std::thread> my_threads;
126
127 struct Helper {
128 std::mutex mut;
129 std::condition_variable cv;
130 bool input_ready = false;
131 bool available = true;
132 bool has_output = false;
133 bool terminated = false;
134 Workspace_ work;
135 };
136 std::vector<Helper*> my_helpers;
137
138 std::mutex my_error_mut;
139 std::exception_ptr my_error;
140
141public:
142 template<typename CreateJob_, typename MergeJob_>
143 bool run(CreateJob_ create_job, MergeJob_ merge_job) {
144 auto num_threads = my_threads.size();
145 bool finished = false;
146 decltype(num_threads) thread = 0, finished_count = 0;
147
148 // We submit jobs by cycling through all threads, then we merge their results in order of submission.
149 // This is a less efficient worksharing scheme but it guarantees the same order of merges.
150 while (1) {
151 auto& env = *(my_helpers[thread]);
152 std::unique_lock lck(env.mut);
153 env.cv.wait(lck, [&]() -> bool { return env.available; });
154
155 {
156 std::lock_guard elck(my_error_mut);
157 if (my_error) {
158 std::rethrow_exception(my_error);
159 }
160 }
161 env.available = false;
162
163 if (env.has_output) {
164 // If the user requests an early quit from the merge job,
165 // there's no point processing the later merge jobs from
166 // other threads, so we just break out at this point.
167 if (!merge_job(env.work)) {
168 return false;
169 }
170 env.has_output = false;
171 }
172
173 if (finished) {
174 // Go through all threads one last time, making sure all results are merged.
175 ++finished_count;
176 if (finished_count == num_threads) {
177 break;
178 }
179 } else {
180 finished = !create_job(env.work);
181 env.input_ready = true;
182 lck.unlock();
183 env.cv.notify_one();
184 }
185
186 ++thread;
187 if (thread == num_threads) {
188 thread = 0;
189 }
190 }
191
192 return true;
193 }
194};
195
196template<class Input_>
197bool fill_to_next_newline(Input_& input, std::vector<char>& buffer, std::size_t block_size) {
198 buffer.resize(block_size);
199 auto done = input.extract(block_size, buffer.data());
200 buffer.resize(done.first);
201 if (!done.second || buffer.empty()) {
202 return false;
203 }
204 char last = buffer.back();
205 while (last != '\n') {
206 last = input.get();
207 buffer.push_back(last);
208 if (!input.advance()) {
209 return false;
210 }
211 }
212 return true;
213}
214
215inline std::size_t count_newlines(const std::vector<char>& buffer) {
216 std::size_t n = 0;
217 for (auto x : buffer) {
218 n += (x == '\n');
219 }
220 return n;
221}
270template<class Input_>
271class Parser {
272public:
277 Parser(std::unique_ptr<Input_> input, const ParserOptions& options) :
278 my_input(std::move(input)),
279 my_nthreads(options.num_threads),
280 my_block_size(options.block_size)
281 {
282 sanisizer::cast<typename std::vector<char>::size_type>(my_block_size); // checking that there won't be any overflow in fill_to_next_newline().
283 }
284
285private:
286 std::unique_ptr<Input_> my_input;
287 int my_nthreads;
288 std::size_t my_block_size;
289
290 Index my_current_line = 0;
291 MatrixDetails my_details;
292
293 template<typename Input2_>
294 static bool chomp(Input2_& input) {
295 while (1) {
296 char x = input.get();
297 if (x != ' ' && x != '\t' && x != '\r') {
298 return true;
299 }
300 if (!(input.advance())) {
301 break;
302 }
303 }
304 return false;
305 }
306
307 template<typename Input2_>
308 static bool advance_and_chomp(Input2_& input) {
309 // When the input is currently on a whitespace, we advance first so we
310 // avoid a redundant iteration where the comparison is always true.
311 if (!(input.advance())) {
312 return false;
313 }
314 return chomp(input);
315 }
316
317 template<typename Input2_>
318 static bool skip_lines(Input2_& input, Index& current_line) {
319 // Skip comments and empty lines.
320 while (1) {
321 char x = input.get();
322 if (x == '%') {
323 do {
324 if (!(input.advance())) {
325 return false;
326 }
327 } while (input.get() != '\n');
328 } else if (x != '\n') {
329 break;
330 }
331
332 if (!input.advance()) { // move past the newline.
333 return false;
334 }
335 ++current_line;
336 }
337 return true;
338 }
339
340private:
341 bool my_passed_banner = false;
342
343 struct ExpectedMatch {
344 ExpectedMatch(bool found, bool newline, bool remaining) : found(found), newline(newline), remaining(remaining) {}
345 ExpectedMatch() : ExpectedMatch(false, false, false) {}
346 bool found;
347 bool newline;
348 bool remaining;
349 };
350
351 ExpectedMatch advance_past_expected_string() {
352 if (!(my_input->advance())) { // move off the last character.
353 return ExpectedMatch(true, false, false);
354 }
355
356 char next = my_input->get();
357 if (next == ' ' || next == '\t' || next == '\r') {
358 if (!advance_and_chomp(*my_input)) { // gobble up all of the remaining horizontal space.
359 return ExpectedMatch(true, false, false);
360 }
361 if (my_input->get() == '\n') {
362 bool remaining = my_input->advance(); // move past the newline for consistency with other functions.
363 return ExpectedMatch(true, true, remaining); // move past the newline for consistency with other functions.
364 }
365 return ExpectedMatch(true, false, true);
366
367 } else if (next == '\n') {
368 bool remaining = my_input->advance(); // move past the newline for consistency with other functions.
369 return ExpectedMatch(true, true, remaining);
370 }
371
372 // If the next character is not a space or whitespace, it's not a match.
373 return ExpectedMatch(false, true, true);
374 }
375
376 ExpectedMatch is_expected_string(const char* ptr, std::size_t len, std::size_t start) {
377 // It is assumed that the first 'start' characters of 'ptr' where
378 // already checked and matched before entering this function, and that
379 // 'my_input' is currently positioned at the start-th character, i.e.,
380 // 'ptr[start-1]' (and thus requires an advance() call before we can
381 // compare against 'ptr[start]').
382 for (std::size_t i = start; i < len; ++i) {
383 if (!my_input->advance()) {
384 return ExpectedMatch(false, false, false);
385 }
386 if (my_input->get() != ptr[i]) {
387 return ExpectedMatch(false, false, true);
388 }
389 }
390 return advance_past_expected_string();
391 }
392
393 ExpectedMatch is_expected_string(const char* ptr, std::size_t len) {
394 // Using a default start of 1, assuming that we've already compared
395 // the first character before entering this function.
396 return is_expected_string(ptr, len, 1);
397 }
398
399 bool parse_banner_object() {
400 ExpectedMatch res;
401
402 char x = my_input->get();
403 if (x == 'm') {
404 res = is_expected_string("matrix", 6);
405 my_details.object = Object::MATRIX;
406 } else if (x == 'v') {
407 res = is_expected_string("vector", 6);
408 my_details.object = Object::VECTOR;
409 }
410
411 if (!res.found) {
412 throw std::runtime_error("first banner field should be one of 'matrix' or 'vector'");
413 }
414 if (!res.remaining) {
415 throw std::runtime_error("end of file reached after the first banner field");
416 }
417
418 return res.newline;
419 }
420
421 bool parse_banner_format() {
422 ExpectedMatch res;
423
424 char x = my_input->get();
425 if (x == 'c') {
426 res = is_expected_string("coordinate", 10);
427 my_details.format = Format::COORDINATE;
428 } else if (x == 'a') {
429 res = is_expected_string("array", 5);
430 my_details.format = Format::ARRAY;
431 }
432
433 if (!res.found) {
434 throw std::runtime_error("second banner field should be one of 'coordinate' or 'array'");
435 }
436 if (!res.remaining) {
437 throw std::runtime_error("end of file reached after the second banner field");
438 }
439
440 return res.newline;
441 }
442
443 bool parse_banner_field() {
444 ExpectedMatch res;
445
446 char x = my_input->get();
447 if (x == 'i') {
448 res = is_expected_string("integer", 7);
449 my_details.field = Field::INTEGER;
450 } else if (x == 'd') {
451 res = is_expected_string("double", 6);
452 my_details.field = Field::DOUBLE;
453 } else if (x == 'c') {
454 res = is_expected_string("complex", 7);
455 my_details.field = Field::COMPLEX;
456 } else if (x == 'p') {
457 res = is_expected_string("pattern", 7);
458 my_details.field = Field::PATTERN;
459 } else if (x == 'r') {
460 res = is_expected_string("real", 4);
461 my_details.field = Field::REAL;
462 }
463
464 if (!res.found) {
465 throw std::runtime_error("third banner field should be one of 'real', 'integer', 'double', 'complex' or 'pattern'");
466 }
467 if (!res.remaining) {
468 throw std::runtime_error("end of file reached after the third banner field");
469 }
470
471 return res.newline;
472 }
473
474 bool parse_banner_symmetry() {
475 ExpectedMatch res;
476
477 char x = my_input->get();
478 if (x == 'g') {
479 res = is_expected_string("general", 7);
480 my_details.symmetry = Symmetry::GENERAL;
481 } else if (x == 'h') {
482 res = is_expected_string("hermitian", 9);
483 my_details.symmetry = Symmetry::HERMITIAN;
484 } else if (x == 's') {
485 if (my_input->advance()) {
486 char x = my_input->get();
487 if (x == 'k') {
488 res = is_expected_string("skew-symmetric", 14, 2);
489 my_details.symmetry = Symmetry::SKEW_SYMMETRIC;
490 } else {
491 res = is_expected_string("symmetric", 9, 2);
492 my_details.symmetry = Symmetry::SYMMETRIC;
493 }
494 }
495 }
496
497 if (!res.found) {
498 throw std::runtime_error("fourth banner field should be one of 'general', 'hermitian', 'skew-symmetric' or 'symmetric'");
499 }
500 if (!res.remaining) {
501 throw std::runtime_error("end of file reached after the fourth banner field");
502 }
503
504 return res.newline;
505 }
506
507 void scan_banner() {
508 if (my_passed_banner) {
509 throw std::runtime_error("banner has already been scanned");
510 }
511 if (!(my_input->valid())) {
512 throw std::runtime_error("failed to find banner line before end of file");
513 }
514 if (my_input->get() != '%') {
515 throw std::runtime_error("first line of the file should be the banner");
516 }
517
518 auto found_banner = is_expected_string("%%MatrixMarket", 14);
519 if (!found_banner.remaining) {
520 throw std::runtime_error("end of file reached before matching the banner");
521 }
522 if (!found_banner.found) {
523 throw std::runtime_error("first line of the file should be the banner");
524 }
525 if (found_banner.newline) {
526 throw std::runtime_error("end of line reached before matching the banner");
527 }
528
529 if (parse_banner_object()) {
530 throw std::runtime_error("end of line reached after the first banner field");
531 }
532 if (parse_banner_format()) {
533 throw std::runtime_error("end of line reached after the second banner field");
534 }
535
536 bool eol = false;
537 if (my_details.object == Object::MATRIX) {
538 if (parse_banner_field()) {
539 throw std::runtime_error("end of line reached after the third banner field");
540 }
541 eol = parse_banner_symmetry();
542 } else {
543 // The NIST spec doesn't say anything about symmetry for vector,
544 // and it doesn't really make sense anyway. We'll just set it to
545 // general and hope for the best.
546 my_details.symmetry = Symmetry::GENERAL;
547
548 // No need to throw on newline because this might be the last field AFAICT.
549 eol = parse_banner_field();
550 }
551
552 my_passed_banner = true;
553
554 // Ignoring all other fields until the newline. We can use a do/while
555 // to skip the initial comparison because we know that the current
556 // character cannot be a newline if eol = false.
557 if (!eol) {
558 do {
559 if (!(my_input->advance())) {
560 throw std::runtime_error("end of file reached before the end of the banner line");
561 }
562 } while (my_input->get() != '\n');
563 my_input->advance(); // move past the newline.
564 }
565
566 ++my_current_line;
567 return;
568 }
569
570public:
577 const MatrixDetails& get_banner() const {
578 if (!my_passed_banner) {
579 throw std::runtime_error("banner has not yet been scanned");
580 }
581 return my_details;
582 }
583
584private:
585 // Only calls with 'last_ = true' need to know if there are any remaining bytes after the newline.
586 // This is because all non-last calls with no remaining bytes must have thrown.
587 struct NotLastSizeInfo {
588 Index index = 0;
589 };
590
591 struct LastSizeInfo {
592 Index index = 0;
593 bool remaining = false;
594 };
595
596 template<bool last_>
597 using SizeInfo = typename std::conditional<last_, LastSizeInfo, NotLastSizeInfo>::type;
598
599 template<bool last_, class Input2_>
600 static SizeInfo<last_> scan_integer_field(bool size, Input2_& input, Index overall_line_count) {
601 SizeInfo<last_> output;
602 bool found = false;
603
604 auto what = [&]() -> std::string {
605 if (size) {
606 return "size";
607 } else {
608 return "index";
609 }
610 };
611
612 constexpr Index max_limit = std::numeric_limits<Index>::max();
613 constexpr Index max_limit_before_mult = max_limit / 10;
614 constexpr Index max_limit_mod = max_limit % 10;
615
616 while (1) {
617 char x = input.get();
618 switch(x) {
619 case '0': case '1': case '2': case '3': case '4': case '5': case '6': case '7': case '8': case '9':
620 {
621 Index delta = x - '0';
622 // Structuring the conditionals so that it's most likely to short-circuit after only testing the first one.
623 if (output.index >= max_limit_before_mult && !(output.index == max_limit_before_mult && delta <= max_limit_mod)) {
624 throw std::runtime_error("integer overflow in " + what() + " field on line " + std::to_string(overall_line_count + 1));
625 }
626 output.index *= 10;
627 output.index += delta;
628 }
629 found = true;
630 break;
631 case '\n':
632 // This check only needs to be put here, as all blanks should be chomped before calling
633 // this function; so we must start on a non-blank character. This starting character is either:
634 // - a digit, in which case found = true and this check is unnecessary.
635 // - a non-newline non-digit, in case we throw.
636 // - a newline, in which case we arrive here.
637 if (!found) {
638 throw std::runtime_error("empty " + what() + " field on line " + std::to_string(overall_line_count + 1));
639 }
640 if constexpr(last_) {
641 output.remaining = input.advance(); // advance past the newline.
642 return output;
643 }
644 throw std::runtime_error("unexpected newline when parsing " + what() + " field on line " + std::to_string(overall_line_count + 1));
645 case ' ': case '\t': case '\r':
646 if (!advance_and_chomp(input)) { // skipping the current and subsequent blanks.
647 if constexpr(last_) {
648 return output;
649 } else {
650 throw std::runtime_error("unexpected end of file when parsing " + what() + " field on line " + std::to_string(overall_line_count + 1));
651 }
652 }
653 if constexpr(last_) {
654 if (input.get() != '\n') {
655 throw std::runtime_error("expected newline after the last " + what() + " field on line " + std::to_string(overall_line_count + 1));
656 }
657 output.remaining = input.advance(); // advance past the newline.
658 }
659 return output;
660 default:
661 throw std::runtime_error("unexpected character when parsing " + what() + " field on line " + std::to_string(overall_line_count + 1));
662 }
663
664 if (!(input.advance())) { // moving past the current digit.
665 if constexpr(last_) {
666 break;
667 } else {
668 throw std::runtime_error("unexpected end of file when parsing " + what() + " field on line " + std::to_string(overall_line_count + 1));
669 }
670 }
671 }
672
673 return output;
674 }
675
676 template<bool last_, class Input2_>
677 static SizeInfo<last_> scan_size_field(Input2_& input, Index overall_line_count) {
678 return scan_integer_field<last_>(true, input, overall_line_count);
679 }
680
681 template<bool last_, class Input2_>
682 static SizeInfo<last_> scan_index_field(Input2_& input, Index overall_line_count) {
683 return scan_integer_field<last_>(false, input, overall_line_count);
684 }
685
686private:
687 bool my_passed_size = false;
688 Index my_nrows = 0, my_ncols = 0, my_nlines = 0;
689
690 void scan_size() {
691 if (!(my_input->valid())) {
692 throw std::runtime_error("failed to find size line before end of file");
693 }
694
695 // Handling stray comments, empty lines, and leading whitespace.
696 if (!skip_lines(*my_input, my_current_line)) {
697 throw std::runtime_error("failed to find size line before end of file");
698 }
699 if (!chomp(*my_input)) {
700 throw std::runtime_error("expected at least one size field on line " + std::to_string(my_current_line + 1));
701 }
702
703 if (my_details.object == Object::MATRIX) {
704 if (my_details.format == Format::COORDINATE) {
705 auto first_field = scan_size_field<false>(*my_input, my_current_line);
706 my_nrows = first_field.index;
707
708 auto second_field = scan_size_field<false>(*my_input, my_current_line);
709 my_ncols = second_field.index;
710
711 auto third_field = scan_size_field<true>(*my_input, my_current_line);
712 my_nlines = third_field.index;
713
714 } else { // i.e., my_details.format == Format::ARRAY
715 auto first_field = scan_size_field<false>(*my_input, my_current_line);
716 my_nrows = first_field.index;
717
718 auto second_field = scan_size_field<true>(*my_input, my_current_line);
719 my_ncols = second_field.index;
720 my_nlines = my_nrows * my_ncols;
721 }
722
723 } else {
724 if (my_details.format == Format::COORDINATE) {
725 auto first_field = scan_size_field<false>(*my_input, my_current_line);
726 my_nrows = first_field.index;
727
728 auto second_field = scan_size_field<true>(*my_input, my_current_line);
729 my_nlines = second_field.index;
730
731 } else { // i.e., my_details.format == Format::ARRAY
732 auto first_field = scan_size_field<true>(*my_input, my_current_line);
733 my_nlines = first_field.index;
734 my_nrows = my_nlines;
735 }
736 my_ncols = 1;
737 }
738
739 ++my_current_line;
740 my_passed_size = true;
741 }
742
743public:
751 Index get_nrows() const {
752 if (!my_passed_size) {
753 throw std::runtime_error("size line has not yet been scanned");
754 }
755 return my_nrows;
756 }
757
765 Index get_ncols() const {
766 if (!my_passed_size) {
767 throw std::runtime_error("size line has not yet been scanned");
768 }
769 return my_ncols;
770 }
771
780 if (!my_passed_size) {
781 throw std::runtime_error("size line has not yet been scanned");
782 }
783 return my_nlines;
784 }
785
786public:
792 scan_banner();
793 scan_size();
794 return;
795 }
796
797private:
798 template<typename Type_>
799 struct ParseInfo {
800 ParseInfo() = default;
801 ParseInfo(Type_ value, bool remaining) : value(value), remaining(remaining) {}
802 Type_ value;
803 bool remaining;
804 };
805
806 template<typename Workspace_>
807 bool configure_parallel_workspace(Workspace_& work) {
808 bool available = fill_to_next_newline(*my_input, work.buffer, my_block_size);
809 work.contents.clear();
810 work.overall_line = my_current_line;
811 my_current_line += count_newlines(work.buffer);
812 return available;
813 }
814
815 void check_num_lines_loop(Index data_line_count) const {
816 if (data_line_count >= my_nlines) {
817 throw std::runtime_error("more lines present than specified in the header (" + std::to_string(data_line_count) + " versus " + std::to_string(my_nlines) + ")");
818 }
819 }
820
821 void check_num_lines_final(bool finished, Index data_line_count) const {
822 if (finished) {
823 if (data_line_count != my_nlines) {
824 // Must be fewer, otherwise we would have triggered the error in check_num_lines_loop() during iteration.
825 throw std::runtime_error("fewer lines present than specified in the header (" + std::to_string(data_line_count) + " versus " + std::to_string(my_nlines) + ")");
826 }
827 }
828 }
829
830private:
831 void check_matrix_coordinate_line(Index currow, Index curcol, Index overall_line_count) const {
832 if (!currow) {
833 throw std::runtime_error("row index must be positive on line " + std::to_string(overall_line_count + 1));
834 }
835 if (currow > my_nrows) {
836 throw std::runtime_error("row index out of range on line " + std::to_string(overall_line_count + 1));
837 }
838 if (!curcol) {
839 throw std::runtime_error("column index must be positive on line " + std::to_string(overall_line_count + 1));
840 }
841 if (curcol > my_ncols) {
842 throw std::runtime_error("column index out of range on line " + std::to_string(overall_line_count + 1));
843 }
844 }
845
846 template<typename Type_, class Input2_, typename FieldParser_, class WrappedStore_>
847 bool scan_matrix_coordinate_non_pattern_base(Input2_& input, Index& overall_line_count, FieldParser_& fparser, WrappedStore_ wstore) const {
848 bool valid = input.valid();
849 while (valid) {
850 // Handling stray comments, empty lines, and leading spaces.
851 if (!skip_lines(input, overall_line_count)) {
852 break;
853 }
854 if (!chomp(input)) {
855 throw std::runtime_error("expected at least three fields for a coordinate matrix on line " + std::to_string(overall_line_count + 1));
856 }
857
858 auto first_field = scan_index_field<false>(input, overall_line_count);
859 auto second_field = scan_index_field<false>(input, overall_line_count);
860 check_matrix_coordinate_line(first_field.index, second_field.index, overall_line_count);
861
862 // 'fparser' should leave 'input' at the start of the next line, if any exists.
863 ParseInfo<Type_> res = fparser(input, overall_line_count);
864 if (!wstore(first_field.index, second_field.index, res.value)) {
865 return false;
866 }
867 ++overall_line_count;
868 valid = res.remaining;
869 }
870
871 return true;
872 }
873
874 template<typename Type_, class FieldParser_, class Store_>
875 bool scan_matrix_coordinate_non_pattern(Store_ store) {
876 bool finished = false;
877 Index current_data_line = 0;
878
879 if (my_nthreads == 1) {
880 FieldParser_ fparser;
881 finished = scan_matrix_coordinate_non_pattern_base<Type_>(
882 *my_input,
883 my_current_line,
884 fparser,
885 [&](Index r, Index c, Type_ value) -> bool {
886 check_num_lines_loop(current_data_line);
887 ++current_data_line;
888 return store(r, c, value);
889 }
890 );
891
892 } else {
893 struct Workspace {
894 std::vector<char> buffer;
895 FieldParser_ fparser;
896 std::vector<std::tuple<Index, Index, Type_> > contents;
897 Index overall_line;
898 };
899
900 ThreadPool<Workspace> tp(
901 [&](Workspace& work) -> bool {
902 byteme::RawBufferReader reader(reinterpret_cast<const unsigned char*>(work.buffer.data()), work.buffer.size());
904 return scan_matrix_coordinate_non_pattern_base<Type_>(
905 pb,
906 work.overall_line,
907 work.fparser,
908 [&](Index r, Index c, Type_ value) -> bool {
909 work.contents.emplace_back(r, c, value);
910 return true; // threads cannot quit early in their parallel sections; this (and thus scan_*_base) must always return true.
911 }
912 );
913 },
914 my_nthreads
915 );
916
917 finished = tp.run(
918 [&](Workspace& work) -> bool {
919 return configure_parallel_workspace(work);
920 },
921 [&](Workspace& work) -> bool {
922 for (const auto& con : work.contents) {
923 check_num_lines_loop(current_data_line); // defer check here for the correctly sync'd value of current_data_line.
924 if (!store(std::get<0>(con), std::get<1>(con), std::get<2>(con))) {
925 return false;
926 }
927 ++current_data_line;
928 }
929 return true;
930 }
931 );
932 }
933
934 check_num_lines_final(finished, current_data_line);
935 return finished;
936 }
937
938private:
939 template<class Input2_, class WrappedStore_>
940 bool scan_matrix_coordinate_pattern_base(Input2_& input, Index& overall_line_count, WrappedStore_ wstore) const {
941 bool valid = input.valid();
942 while (valid) {
943 // Handling stray comments, empty lines, and leading spaces.
944 if (!skip_lines(input, overall_line_count)) {
945 break;
946 }
947 if (!chomp(input)) {
948 throw std::runtime_error("expected two fields for a pattern matrix on line " + std::to_string(overall_line_count + 1));
949 }
950
951 auto first_field = scan_index_field<false>(input, overall_line_count);
952 auto second_field = scan_index_field<true>(input, overall_line_count);
953 check_matrix_coordinate_line(first_field.index, second_field.index, overall_line_count);
954
955 if (!wstore(first_field.index, second_field.index)) {
956 return false;
957 }
958 ++overall_line_count;
959 valid = second_field.remaining;
960 }
961
962 return true;
963 }
964
965 template<class Store_>
966 bool scan_matrix_coordinate_pattern(Store_ store) {
967 bool finished = false;
968 Index current_data_line = 0;
969
970 if (my_nthreads == 1) {
971 finished = scan_matrix_coordinate_pattern_base(
972 *my_input,
973 my_current_line,
974 [&](Index r, Index c) -> bool {
975 check_num_lines_loop(current_data_line);
976 ++current_data_line;
977 return store(r, c);
978 }
979 );
980
981 } else {
982 struct Workspace {
983 std::vector<char> buffer;
984 std::vector<std::tuple<Index, Index> > contents;
985 Index overall_line;
986 };
987
988 ThreadPool<Workspace> tp(
989 [&](Workspace& work) -> bool {
990 byteme::RawBufferReader reader(reinterpret_cast<const unsigned char*>(work.buffer.data()), work.buffer.size());
992 return scan_matrix_coordinate_pattern_base(
993 pb,
994 work.overall_line,
995 [&](Index r, Index c) -> bool {
996 work.contents.emplace_back(r, c);
997 return true; // threads cannot quit early in their parallel sections; this (and thus scan_*_base) must always return true.
998 }
999 );
1000 },
1001 my_nthreads
1002 );
1003
1004 finished = tp.run(
1005 [&](Workspace& work) -> bool {
1006 return configure_parallel_workspace(work);
1007 },
1008 [&](Workspace& work) -> bool {
1009 for (const auto& con : work.contents) {
1010 check_num_lines_loop(current_data_line);
1011 if (!store(std::get<0>(con), std::get<1>(con))) {
1012 return false;
1013 }
1014 ++current_data_line;
1015 }
1016 return true;
1017 }
1018 );
1019 }
1020
1021 check_num_lines_final(finished, current_data_line);
1022 return finished;
1023 }
1024
1025private:
1026 void check_vector_coordinate_line(Index currow, Index overall_line_count) const {
1027 if (!currow) {
1028 throw std::runtime_error("row index must be positive on line " + std::to_string(overall_line_count + 1));
1029 }
1030 if (currow > my_nrows) {
1031 throw std::runtime_error("row index out of range on line " + std::to_string(overall_line_count + 1));
1032 }
1033 }
1034
1035 template<typename Type_, class Input2_, class FieldParser_, class WrappedStore_>
1036 bool scan_vector_coordinate_non_pattern_base(Input2_& input, Index& overall_line_count, FieldParser_& fparser, WrappedStore_ wstore) const {
1037 bool valid = input.valid();
1038 while (valid) {
1039 // handling stray comments, empty lines, and leading spaces.
1040 if (!skip_lines(input, overall_line_count)) {
1041 break;
1042 }
1043 if (!chomp(input)) {
1044 throw std::runtime_error("expected at least two fields for a coordinate vector on line " + std::to_string(overall_line_count + 1));
1045 }
1046
1047 auto first_field = scan_index_field<false>(input, overall_line_count);
1048 check_vector_coordinate_line(first_field.index, overall_line_count);
1049
1050 // 'fparser' should leave 'input' at the start of the next line, if any exists.
1051 ParseInfo<Type_> res = fparser(input, overall_line_count);
1052 if (!wstore(first_field.index, res.value)) {
1053 return false;
1054 }
1055 ++overall_line_count;
1056 valid = res.remaining;
1057 }
1058
1059 return true;
1060 }
1061
1062 template<typename Type_, class FieldParser_, class Store_>
1063 bool scan_vector_coordinate_non_pattern(Store_ store) {
1064 bool finished = false;
1065 Index current_data_line = 0;
1066
1067 if (my_nthreads == 1) {
1068 FieldParser_ fparser;
1069 finished = scan_vector_coordinate_non_pattern_base<Type_>(
1070 *my_input,
1071 my_current_line,
1072 fparser,
1073 [&](Index r, Type_ value) -> bool {
1074 check_num_lines_loop(current_data_line);
1075 ++current_data_line;
1076 return store(r, 1, value);
1077 }
1078 );
1079
1080 } else {
1081 struct Workspace {
1082 std::vector<char> buffer;
1083 FieldParser_ fparser;
1084 std::vector<std::tuple<Index, Type_> > contents;
1085 Index overall_line;
1086 };
1087
1088 ThreadPool<Workspace> tp(
1089 [&](Workspace& work) -> bool {
1090 byteme::RawBufferReader reader(reinterpret_cast<const unsigned char*>(work.buffer.data()), work.buffer.size());
1092 return scan_vector_coordinate_non_pattern_base<Type_>(
1093 pb,
1094 work.overall_line,
1095 work.fparser,
1096 [&](Index r, Type_ value) -> bool {
1097 work.contents.emplace_back(r, value);
1098 return true; // threads cannot quit early in their parallel sections; this (and thus scan_*_base) must always return true.
1099 }
1100 );
1101 },
1102 my_nthreads
1103 );
1104
1105 finished = tp.run(
1106 [&](Workspace& work) -> bool {
1107 return configure_parallel_workspace(work);
1108 },
1109 [&](Workspace& work) -> bool {
1110 for (const auto& con : work.contents) {
1111 check_num_lines_loop(current_data_line);
1112 if (!store(std::get<0>(con), 1, std::get<1>(con))) {
1113 return false;
1114 }
1115 ++current_data_line;
1116 }
1117 return true;
1118 }
1119 );
1120 }
1121
1122 check_num_lines_final(finished, current_data_line);
1123 return finished;
1124 }
1125
1126private:
1127 template<class Input2_, class WrappedStore_>
1128 bool scan_vector_coordinate_pattern_base(Input2_& input, Index& overall_line_count, WrappedStore_ wstore) const {
1129 bool valid = input.valid();
1130 while (valid) {
1131 // Handling stray comments, empty lines, and leading spaces.
1132 if (!skip_lines(input, overall_line_count)) {
1133 break;
1134 }
1135 if (!chomp(input)) {
1136 throw std::runtime_error("expected one field for a coordinate vector on line " + std::to_string(overall_line_count + 1));
1137 }
1138
1139 auto first_field = scan_index_field<true>(input, overall_line_count);
1140 check_vector_coordinate_line(first_field.index, overall_line_count);
1141
1142 if (!wstore(first_field.index)) {
1143 return false;
1144 }
1145 ++overall_line_count;
1146 valid = first_field.remaining;
1147 }
1148
1149 return true;
1150 }
1151
1152 template<class Store_>
1153 bool scan_vector_coordinate_pattern(Store_ store) {
1154 bool finished = false;
1155 Index current_data_line = 0;
1156
1157 if (my_nthreads == 1) {
1158 finished = scan_vector_coordinate_pattern_base(
1159 *my_input,
1160 my_current_line,
1161 [&](Index r) -> bool {
1162 check_num_lines_loop(current_data_line);
1163 ++current_data_line;
1164 return store(r, 1);
1165 }
1166 );
1167
1168 } else {
1169 struct Workspace {
1170 std::vector<char> buffer;
1171 std::vector<Index> contents;
1172 Index overall_line;
1173 };
1174
1175 ThreadPool<Workspace> tp(
1176 [&](Workspace& work) -> bool {
1177 byteme::RawBufferReader reader(reinterpret_cast<const unsigned char*>(work.buffer.data()), work.buffer.size());
1179 return scan_vector_coordinate_pattern_base(
1180 pb,
1181 work.overall_line,
1182 [&](Index r) -> bool {
1183 work.contents.emplace_back(r);
1184 return true; // threads cannot quit early in their parallel sections; this (and thus scan_*_base) must always return true.
1185 }
1186 );
1187 },
1188 my_nthreads
1189 );
1190
1191 finished = tp.run(
1192 [&](Workspace& work) -> bool {
1193 return configure_parallel_workspace(work);
1194 },
1195 [&](Workspace& work) -> bool {
1196 for (const auto& r : work.contents) {
1197 check_num_lines_loop(current_data_line);
1198 if (!store(r, 1)) {
1199 return false;
1200 }
1201 ++current_data_line;
1202 }
1203 return true;
1204 }
1205 );
1206 }
1207
1208 check_num_lines_final(finished, current_data_line);
1209 return finished;
1210 }
1211
1212private:
1213 template<typename Type_, class Input2_, class FieldParser_, class WrappedStore_>
1214 bool scan_matrix_array_base(Input2_& input, Index& overall_line_count, FieldParser_& fparser, WrappedStore_ wstore) const {
1215 bool valid = input.valid();
1216 while (valid) {
1217 // Handling stray comments, empty lines, and leading spaces.
1218 if (!skip_lines(input, overall_line_count)) {
1219 break;
1220 }
1221 if (!chomp(input)) {
1222 throw std::runtime_error("expected at least one field for an array matrix on line " + std::to_string(overall_line_count + 1));
1223 }
1224
1225 // 'fparser' should leave 'input' at the start of the next line, if any exists.
1226 ParseInfo<Type_> res = fparser(input, overall_line_count);
1227 if (!wstore(res.value)) {
1228 return false;
1229 }
1230 ++overall_line_count;
1231 valid = res.remaining;
1232 }
1233
1234 return true;
1235 }
1236
1237 template<typename Type_, class FieldParser_, class Store_>
1238 bool scan_matrix_array(Store_ store) {
1239 bool finished = false;
1240 Index current_data_line = 0;
1241
1242 Index currow = 1, curcol = 1;
1243 auto increment = [&]() {
1244 ++currow;
1245 if (currow > my_nrows) {
1246 ++curcol;
1247 currow = 1;
1248 }
1249 };
1250
1251 if (my_nthreads == 1) {
1252 FieldParser_ fparser;
1253 finished = scan_matrix_array_base<Type_>(
1254 *my_input,
1255 my_current_line,
1256 fparser,
1257 [&](Type_ value) -> bool {
1258 check_num_lines_loop(current_data_line);
1259 if (!store(currow, curcol, value)) {
1260 return false;
1261 }
1262 ++current_data_line;
1263 increment();
1264 return true;
1265 }
1266 );
1267
1268 } else {
1269 struct Workspace {
1270 std::vector<char> buffer;
1271 FieldParser_ fparser;
1272 std::vector<Type_> contents;
1273 Index overall_line;
1274 };
1275
1276 ThreadPool<Workspace> tp(
1277 [&](Workspace& work) -> bool {
1278 byteme::RawBufferReader reader(reinterpret_cast<const unsigned char*>(work.buffer.data()), work.buffer.size());
1280 return scan_matrix_array_base<Type_>(
1281 pb,
1282 work.overall_line,
1283 work.fparser,
1284 [&](Type_ value) -> bool {
1285 work.contents.emplace_back(value);
1286 return true; // threads cannot quit early in their parallel sections; this (and thus scan_*_base) must always return true.
1287 }
1288 );
1289 },
1290 my_nthreads
1291 );
1292
1293 finished = tp.run(
1294 [&](Workspace& work) -> bool {
1295 return configure_parallel_workspace(work);
1296 },
1297 [&](Workspace& work) -> bool {
1298 for (const auto& val : work.contents) {
1299 check_num_lines_loop(current_data_line);
1300 if (!store(currow, curcol, val)) {
1301 return false;
1302 }
1303 ++current_data_line;
1304 increment();
1305 }
1306 return true;
1307 }
1308 );
1309 }
1310
1311 check_num_lines_final(finished, current_data_line);
1312 return finished;
1313 }
1314
1315private:
1316 template<typename Type_, class Input2_, class FieldParser_, class WrappedStore_>
1317 bool scan_vector_array_base(Input2_& input, Index& overall_line_count, FieldParser_& fparser, WrappedStore_ wstore) const {
1318 bool valid = input.valid();
1319 while (valid) {
1320 // Handling stray comments, empty lines, and leading spaces.
1321 if (!skip_lines(input, overall_line_count)) {
1322 break;
1323 }
1324 if (!chomp(input)) {
1325 throw std::runtime_error("expected at least one field for an array vector on line " + std::to_string(overall_line_count + 1));
1326 }
1327
1328 // 'fparser' should leave 'input' at the start of the next line, if any exists.
1329 ParseInfo<Type_> res = fparser(input, overall_line_count);
1330 if (!wstore(res.value)) {
1331 return false;
1332 }
1333 ++overall_line_count;
1334 valid = res.remaining;
1335 }
1336
1337 return true;
1338 }
1339
1340 template<typename Type_, class FieldParser_, class Store_>
1341 bool scan_vector_array(Store_ store) {
1342 bool finished = false;
1343 Index current_data_line = 0;
1344 if (my_nthreads == 1) {
1345 FieldParser_ fparser;
1346 finished = scan_vector_array_base<Type_>(
1347 *my_input,
1348 my_current_line,
1349 fparser,
1350 [&](Type_ value) -> bool {
1351 check_num_lines_loop(current_data_line);
1352 ++current_data_line;
1353 return store(current_data_line, 1, value);
1354 }
1355 );
1356
1357 } else {
1358 struct Workspace {
1359 std::vector<char> buffer;
1360 FieldParser_ fparser;
1361 std::vector<Type_> contents;
1362 Index overall_line;
1363 };
1364
1365 ThreadPool<Workspace> tp(
1366 [&](Workspace& work) -> bool {
1367 byteme::RawBufferReader reader(reinterpret_cast<const unsigned char*>(work.buffer.data()), work.buffer.size());
1369 return scan_vector_array_base<Type_>(
1370 pb,
1371 work.overall_line,
1372 work.fparser,
1373 [&](Type_ value) -> bool {
1374 work.contents.emplace_back(value);
1375 return true; // threads cannot quit early in their parallel sections; this (and thus scan_*_base) must always return true.
1376 }
1377 );
1378 },
1379 my_nthreads
1380 );
1381
1382 finished = tp.run(
1383 [&](Workspace& work) -> bool {
1384 return configure_parallel_workspace(work);
1385 },
1386 [&](Workspace& work) -> bool {
1387 for (const auto& val : work.contents) {
1388 check_num_lines_loop(current_data_line);
1389 ++current_data_line;
1390 if (!store(current_data_line, 1, val)) {
1391 return false;
1392 }
1393 }
1394 return true;
1395 }
1396 );
1397 }
1398
1399 check_num_lines_final(finished, current_data_line);
1400 return finished;
1401 }
1402
1403private:
1404 void check_preamble() const {
1405 if (!my_passed_banner || !my_passed_size) {
1406 throw std::runtime_error("banner or size lines have not yet been parsed");
1407 }
1408 }
1409
1410 template<typename Type_>
1411 class IntegerFieldParser {
1412 public:
1413 template<class Input2_>
1414 ParseInfo<Type_> operator()(Input2_& input, Index overall_line_count) {
1415 char firstchar = input.get();
1416 bool negative = (firstchar == '-');
1417 if (negative || firstchar == '+') {
1418 if (!(input.advance())) {
1419 throw std::runtime_error("premature termination of an integer on line " + std::to_string(overall_line_count + 1));
1420 }
1421 }
1422
1423 constexpr Type_ upper_limit = std::numeric_limits<Type_>::max();
1424 constexpr Type_ upper_limit_before_mult = upper_limit / 10;
1425 constexpr Type_ upper_limit_mod = upper_limit % 10;
1426 constexpr Type_ lower_limit = std::numeric_limits<Type_>::lowest();
1427 constexpr Type_ lower_limit_before_mult = lower_limit / 10;
1428 constexpr Type_ lower_limit_mod = -(lower_limit % 10);
1429
1430 Type_ val = 0;
1431 bool found = false;
1432 while (1) {
1433 char x = input.get();
1434 switch (x) {
1435 case '0': case '1': case '2': case '3': case '4': case '5': case '6': case '7': case '8': case '9':
1436 {
1437 Type_ delta = x - '0';
1438 // We have to handle negative and positive cases separately as they overflow at different thresholds.
1439 if (negative) {
1440 // Structuring the conditionals so that it's most likely to short-circuit after only testing the first one.
1441 if (val <= lower_limit_before_mult && !(val == lower_limit_before_mult && delta <= lower_limit_mod)) {
1442 throw std::runtime_error("integer underflow on line " + std::to_string(overall_line_count + 1));
1443 }
1444 val *= 10;
1445 val -= delta;
1446 } else {
1447 if (val >= upper_limit_before_mult && !(val == upper_limit_before_mult && delta <= upper_limit_mod)) {
1448 throw std::runtime_error("integer overflow on line " + std::to_string(overall_line_count + 1));
1449 }
1450 val *= 10;
1451 val += delta;
1452 }
1453 }
1454 found = true;
1455 break;
1456 case ' ': case '\t': case '\r':
1457 if (!advance_and_chomp(input)) { // skipping past the current position before chomping.
1458 return ParseInfo<Type_>(val, false);
1459 }
1460 if (input.get() != '\n') {
1461 throw std::runtime_error("more fields than expected on line " + std::to_string(overall_line_count + 1));
1462 }
1463 return ParseInfo<Type_>(val, input.advance()); // move past the newline.
1464 case '\n':
1465 // This check only needs to be put here, as all blanks should be chomped before calling
1466 // this function; so we must start on a non-blank character. This starting character is either:
1467 // - a digit, in which case found = true and this check is unnecessary.
1468 // - a non-newline non-digit, in case we throw.
1469 // - a newline, in which case we arrive here.
1470 if (!found) {
1471 throw std::runtime_error("empty integer field on line " + std::to_string(overall_line_count + 1));
1472 }
1473 return ParseInfo<Type_>(val, input.advance()); // move past the newline.
1474 default:
1475 throw std::runtime_error("expected an integer value on line " + std::to_string(overall_line_count + 1));
1476 }
1477
1478 if (!(input.advance())) {
1479 break;
1480 }
1481 }
1482
1483 return ParseInfo<Type_>(val, false);
1484 }
1485 };
1486
1487public:
1501 template<typename Type_ = int, class Store_>
1502 bool scan_integer(Store_ store) {
1503 check_preamble();
1504
1505 auto wrapped_store = [&](Index r, Index c, Type_ val) -> bool {
1506 if constexpr(std::is_same<typename std::invoke_result<Store_, Index, Index, Type_>::type, bool>::value) {
1507 return store(r, c, val);
1508 } else {
1509 store(r, c, val);
1510 return true;
1511 }
1512 };
1513
1514 if (my_details.format == Format::COORDINATE) {
1515 if (my_details.object == Object::MATRIX) {
1516 return scan_matrix_coordinate_non_pattern<Type_, IntegerFieldParser<Type_> >(std::move(wrapped_store));
1517 } else {
1518 return scan_vector_coordinate_non_pattern<Type_, IntegerFieldParser<Type_> >(std::move(wrapped_store));
1519 }
1520 } else {
1521 if (my_details.object == Object::MATRIX) {
1522 return scan_matrix_array<Type_, IntegerFieldParser<Type_> >(std::move(wrapped_store));
1523 } else {
1524 return scan_vector_array<Type_, IntegerFieldParser<Type_> >(std::move(wrapped_store));
1525 }
1526 }
1527 }
1528
1529private:
1530 template<bool last_, typename Type_, typename Input2_>
1531 static typename std::conditional<last_, ParseInfo<Type_>, Type_>::type parse_special(Input2_& input, bool negative, bool check_inf, Index overall_line_count) {
1532 auto what = [&]() -> std::string {
1533 if (check_inf) {
1534 return std::string("infinity");
1535 } else {
1536 return std::string("NaN");
1537 }
1538 };
1539
1540 auto check = [&](char lower, char upper) -> void {
1541 if (!input.advance()) {
1542 throw std::runtime_error("unexpected termination of " + what() + " on line " + std::to_string(overall_line_count + 1));
1543 }
1544 char current = input.get();
1545 if (current != lower && current != upper) {
1546 throw std::runtime_error("unexpected character when parsing " + what() + " on line " + std::to_string(overall_line_count + 1));
1547 }
1548 };
1549
1550 bool remaining = true;
1551 if (check_inf) {
1552 // We already know that we're starting with 'i', so we can proceed to the remaining two letters.
1553 check('n', 'N');
1554 check('f', 'F');
1555
1556 // Checking if there's any more letters.
1557 remaining = input.advance();
1558 if (remaining) {
1559 char current = input.get();
1560 if (current != '\n' && current != ' ' && current != '\t' && current != '\r') {
1561 if (current != 'i' && current != 'I') {
1562 throw std::runtime_error("unexpected character when parsing " + what() + " on line " + std::to_string(overall_line_count + 1));
1563 }
1564 check('n', 'N');
1565 check('i', 'I');
1566 check('t', 'T');
1567 check('y', 'Y');
1568 remaining = input.advance();
1569 }
1570 }
1571 } else {
1572 // We already know that we're starting with 'n', so we can proceed to the remaining two letters.
1573 check('a', 'A');
1574 check('n', 'N');
1575 remaining = input.advance();
1576 }
1577
1578 if (remaining) {
1579 // Using a switch for consistency with parse_real().
1580 switch(input.get()) {
1581 case ' ': case '\t': case '\r':
1582 if (!advance_and_chomp(input)) {
1583 if constexpr(last_) {
1584 remaining = false;
1585 break;
1586 }
1587 throw std::runtime_error("unexpected end of file on line " + std::to_string(overall_line_count + 1));
1588 }
1589 if constexpr(last_) {
1590 if (input.get() != '\n') {
1591 throw std::runtime_error("more fields than expected on line " + std::to_string(overall_line_count + 1));
1592 }
1593 remaining = input.advance(); // advance past the newline
1594 }
1595 break;
1596 case '\n':
1597 if constexpr(last_) {
1598 remaining = input.advance(); // advance past the newline.
1599 break;
1600 }
1601 throw std::runtime_error("unexpected newline on line " + std::to_string(overall_line_count + 1));
1602 default:
1603 throw std::runtime_error("unexpected character when parsing " + what() + " on line " + std::to_string(overall_line_count + 1));
1604 }
1605 } else {
1606 if constexpr(!last_) {
1607 throw std::runtime_error("unexpected end of file on line " + std::to_string(overall_line_count + 1));
1608 }
1609 }
1610
1611 Type_ value;
1612 if (check_inf) {
1613 if constexpr(!std::numeric_limits<Type_>::has_infinity) {
1614 throw std::runtime_error("requested type does not support " + what());
1615 }
1616 value = std::numeric_limits<Type_>::infinity();
1617 } else {
1618 if constexpr(!std::numeric_limits<Type_>::has_quiet_NaN) {
1619 throw std::runtime_error("requested type does not support " + what());
1620 }
1621 value = std::numeric_limits<Type_>::quiet_NaN();
1622 }
1623 if (negative) {
1624 value *= -1;
1625 }
1626
1627 if constexpr(last_) {
1628 ParseInfo<Type_> output;
1629 output.value = value;
1630 output.remaining = remaining;
1631 return output;
1632 } else {
1633 return value;
1634 }
1635 }
1636
1637 template<bool last_, typename Type_, typename Input2_>
1638 static typename std::conditional<last_, ParseInfo<Type_>, Type_>::type parse_real(Input2_& input, Index overall_line_count) {
1639 char firstchar = input.get();
1640 bool negative = (firstchar == '-');
1641 if (negative || firstchar == '+') {
1642 if (!(input.advance())) {
1643 throw std::runtime_error("unexpected end of file on line " + std::to_string(overall_line_count + 1));
1644 }
1645 }
1646
1647 // Check for specials.
1648 switch (input.get()) {
1649 case 'i': case 'I':
1650 return parse_special<last_, Type_>(input, negative, true, overall_line_count);
1651 case 'n': case 'N':
1652 return parse_special<last_, Type_>(input, negative, false, overall_line_count);
1653 };
1654
1655 // Processing the integer component.
1656 Type_ value = 0;
1657 bool found = false;
1658 bool remaining = true;
1659
1660 while (1) {
1661 char val = input.get();
1662 switch(val) {
1663 case '0': case '1': case '2': case '3': case '4': case '5': case '6': case '7': case '8': case '9':
1664 value *= 10;
1665 value += val - '0';
1666 found = true;
1667 break;
1668 case ' ': case '\t': case '\r':
1669 if (!advance_and_chomp(input)) {
1670 if constexpr(last_) {
1671 remaining = false;
1672 goto final_processing;
1673 }
1674 throw std::runtime_error("unexpected end of file on line " + std::to_string(overall_line_count + 1));
1675 }
1676 if constexpr(last_) {
1677 if (input.get() != '\n') {
1678 throw std::runtime_error("more fields than expected on line " + std::to_string(overall_line_count + 1));
1679 }
1680 remaining = input.advance(); // advance past the newline
1681 }
1682 goto final_processing;
1683 case '\n':
1684 if constexpr(last_) {
1685 remaining = input.advance(); // advance past the newline
1686 goto final_processing;
1687 }
1688 throw std::runtime_error("unexpected newline on line " + std::to_string(overall_line_count + 1));
1689 case '.':
1690 if (!input.advance()) {
1691 if constexpr(last_) {
1692 remaining = false;
1693 goto final_processing;
1694 }
1695 throw std::runtime_error("unexpected end of file on line " + std::to_string(overall_line_count + 1));
1696 }
1697 goto decimal_processing;
1698 case 'e': case 'E':
1699 if (!input.advance()) {
1700 throw std::runtime_error("unexpected end of file on line " + std::to_string(overall_line_count + 1));
1701 }
1702 goto exponent_processing;
1703 default:
1704 throw std::runtime_error("unrecognized character in real number on line " + std::to_string(overall_line_count + 1));
1705 }
1706
1707 if (!(input.advance())) {
1708 if constexpr(last_) {
1709 remaining = input.advance();
1710 goto final_processing;
1711 }
1712 throw std::runtime_error("unexpected end of file on line " + std::to_string(overall_line_count + 1));
1713 }
1714 }
1715
1716 // Processing the decimal component.
1717decimal_processing:
1718 {
1719 Type_ multiplier = 1;
1720 while (1) {
1721 char val = input.get();
1722 switch(val) {
1723 case '0': case '1': case '2': case '3': case '4': case '5': case '6': case '7': case '8': case '9':
1724 multiplier *= 10;
1725 value += (val - '0') / multiplier;
1726 found = true;
1727 break;
1728 case ' ': case '\t': case '\r':
1729 if (!advance_and_chomp(input)) {
1730 if constexpr(last_) {
1731 remaining = false;
1732 goto final_processing;
1733 }
1734 throw std::runtime_error("unexpected end of file on line " + std::to_string(overall_line_count + 1));
1735 }
1736 if constexpr(last_) {
1737 if (input.get() != '\n') {
1738 throw std::runtime_error("more fields than expected on line " + std::to_string(overall_line_count + 1));
1739 }
1740 remaining = input.advance(); // advance past the newline
1741 }
1742 goto final_processing;
1743 case '\n':
1744 if constexpr(last_) {
1745 remaining = input.advance(); // advance past the newline
1746 goto final_processing;
1747 }
1748 throw std::runtime_error("unexpected newline on line " + std::to_string(overall_line_count + 1));
1749 case 'e': case 'E':
1750 if (!input.advance()) {
1751 throw std::runtime_error("unexpected end of file on line " + std::to_string(overall_line_count + 1));
1752 }
1753 goto exponent_processing;
1754 default:
1755 throw std::runtime_error("unrecognized character in real number on line " + std::to_string(overall_line_count + 1));
1756 }
1757
1758 if (!(input.advance())) {
1759 if constexpr(last_) {
1760 remaining = input.advance();
1761 goto final_processing;
1762 }
1763 throw std::runtime_error("unexpected end of file on line " + std::to_string(overall_line_count + 1));
1764 }
1765 }
1766 }
1767
1768 // Processing the exponent.
1769exponent_processing:
1770 {
1771 bool expnegative = (input.get() == '-');
1772 if (expnegative || input.get() == '+') {
1773 if (!(input.advance())) {
1774 throw std::runtime_error("unexpected end of file on line " + std::to_string(overall_line_count + 1));
1775 }
1776 }
1777
1778 Type_ exponent = 0;
1779 bool expfound = false;
1780 while (1) {
1781 char val = input.get();
1782 switch(val) {
1783 case '0': case '1': case '2': case '3': case '4': case '5': case '6': case '7': case '8': case '9':
1784 exponent *= 10;
1785 exponent += (val - '0');
1786 expfound = true;
1787 break;
1788 case ' ': case '\t': case '\r':
1789 if (!advance_and_chomp(input)) {
1790 if constexpr(last_) {
1791 remaining = false;
1792 goto exponent_processing_finish;
1793 }
1794 throw std::runtime_error("unexpected end of file on line " + std::to_string(overall_line_count + 1));
1795 }
1796 if constexpr(last_) {
1797 if (input.get() != '\n') {
1798 throw std::runtime_error("more fields than expected on line " + std::to_string(overall_line_count + 1));
1799 }
1800 remaining = input.advance(); // advance past the newline
1801 }
1802 goto exponent_processing_finish;
1803 case '\n':
1804 if constexpr(last_) {
1805 remaining = input.advance(); // advance past the newline
1806 goto exponent_processing_finish;
1807 }
1808 throw std::runtime_error("unexpected newline on line " + std::to_string(overall_line_count + 1));
1809 default:
1810 throw std::runtime_error("unrecognized character in real number on line " + std::to_string(overall_line_count + 1));
1811 }
1812
1813 if (!(input.advance())) {
1814 if constexpr(last_) {
1815 remaining = input.advance();
1816 goto exponent_processing_finish;
1817 }
1818 throw std::runtime_error("unexpected end of file on line " + std::to_string(overall_line_count + 1));
1819 }
1820 }
1821
1822exponent_processing_finish:
1823 if (!expfound) {
1824 throw std::runtime_error("no digits in the decimal exponent on line " + std::to_string(overall_line_count + 1));
1825 }
1826 if (expnegative) {
1827 exponent *= -1;
1828 }
1829 value *= std::pow(10.0, exponent);
1830 }
1831
1832final_processing:
1833 if (!found) {
1834 throw std::runtime_error("no digits in real number on line " + std::to_string(overall_line_count + 1));
1835 }
1836 if (negative) {
1837 value *= -1;
1838 }
1839
1840 if constexpr(last_) {
1841 ParseInfo<Type_> output;
1842 output.value = value;
1843 output.remaining = remaining;
1844 return output;
1845 } else {
1846 return value;
1847 }
1848 }
1849
1850 template<typename Type_>
1851 class RealFieldParser {
1852 public:
1853 template<class Input2_>
1854 ParseInfo<Type_> operator()(Input2_& input, Index overall_line_count) {
1855 return parse_real<true, Type_>(input, overall_line_count);
1856 }
1857 };
1858
1859public:
1873 template<typename Type_ = double, class Store_>
1874 bool scan_real(Store_&& store) {
1875 check_preamble();
1876
1877 auto store_real = [&](Index r, Index c, Type_ val) -> bool {
1878 if constexpr(std::is_same<typename std::invoke_result<Store_, Index, Index, Type_>::type, bool>::value) {
1879 return store(r, c, val);
1880 } else {
1881 store(r, c, val);
1882 return true;
1883 }
1884 };
1885
1886 if (my_details.format == Format::COORDINATE) {
1887 if (my_details.object == Object::MATRIX) {
1888 return scan_matrix_coordinate_non_pattern<Type_, RealFieldParser<Type_> >(std::move(store_real));
1889 } else {
1890 return scan_vector_coordinate_non_pattern<Type_, RealFieldParser<Type_> >(std::move(store_real));
1891 }
1892 } else {
1893 if (my_details.object == Object::MATRIX) {
1894 return scan_matrix_array<Type_, RealFieldParser<Type_> >(std::move(store_real));
1895 } else {
1896 return scan_vector_array<Type_, RealFieldParser<Type_> >(std::move(store_real));
1897 }
1898 }
1899 }
1900
1915 template<typename Type_ = double, class Store_>
1916 bool scan_double(Store_ store) {
1917 return scan_real<Type_, Store_>(std::move(store));
1918 }
1919
1920private:
1921 template<typename InnerType_>
1922 class ComplexFieldParser {
1923 public:
1924 template<typename Input2_>
1925 ParseInfo<std::complex<InnerType_> > operator()(Input2_& input, Index overall_line_count) {
1926 auto first = parse_real<false, InnerType_>(input, overall_line_count);
1927 auto second = parse_real<true, InnerType_>(input, overall_line_count);
1928 ParseInfo<std::complex<InnerType_> > output;
1929 output.value.real(first);
1930 output.value.imag(second.value);
1931 output.remaining = second.remaining;
1932 return output;
1933 }
1934 };
1935
1936public:
1950 template<typename Type_ = double, class Store_>
1951 bool scan_complex(Store_ store) {
1952 check_preamble();
1953
1954 typedef std::complex<Type_> FullType;
1955 auto store_comp = [&](Index r, Index c, FullType val) -> bool {
1956 if constexpr(std::is_same<typename std::invoke_result<Store_, Index, Index, FullType>::type, bool>::value) {
1957 return store(r, c, val);
1958 } else {
1959 store(r, c, val);
1960 return true;
1961 }
1962 };
1963
1964 if (my_details.format == Format::COORDINATE) {
1965 if (my_details.object == Object::MATRIX) {
1966 return scan_matrix_coordinate_non_pattern<FullType, ComplexFieldParser<Type_> >(std::move(store_comp));
1967 } else {
1968 return scan_vector_coordinate_non_pattern<FullType, ComplexFieldParser<Type_> >(std::move(store_comp));
1969 }
1970 } else {
1971 if (my_details.object == Object::MATRIX) {
1972 return scan_matrix_array<FullType, ComplexFieldParser<Type_> >(std::move(store_comp));
1973 } else {
1974 return scan_vector_array<FullType, ComplexFieldParser<Type_> >(std::move(store_comp));
1975 }
1976 }
1977 }
1978
1994 template<typename Type_ = bool, class Store_>
1995 bool scan_pattern(Store_ store) {
1996 check_preamble();
1997 if (my_details.format != Format::COORDINATE) {
1998 throw std::runtime_error("'array' format for 'pattern' field is not supported");
1999 }
2000
2001 auto store_pat = [&](Index r, Index c) -> bool {
2002 if constexpr(std::is_same<typename std::invoke_result<Store_, Index, Index, bool>::type, bool>::value) {
2003 return store(r, c, true);
2004 } else {
2005 store(r, c, true);
2006 return true;
2007 }
2008 };
2009
2010 if (my_details.object == Object::MATRIX) {
2011 return scan_matrix_coordinate_pattern(std::move(store_pat));
2012 } else {
2013 return scan_vector_coordinate_pattern(std::move(store_pat));
2014 }
2015 }
2016};
2017
2018}
2019
2020#endif
Parse a matrix from a Matrix Market file.
Definition Parser.hpp:271
Index get_ncols() const
Definition Parser.hpp:765
bool scan_integer(Store_ store)
Definition Parser.hpp:1502
const MatrixDetails & get_banner() const
Definition Parser.hpp:577
bool scan_pattern(Store_ store)
Definition Parser.hpp:1995
bool scan_complex(Store_ store)
Definition Parser.hpp:1951
bool scan_real(Store_ &&store)
Definition Parser.hpp:1874
bool scan_double(Store_ store)
Definition Parser.hpp:1916
void scan_preamble()
Definition Parser.hpp:791
Parser(std::unique_ptr< Input_ > input, const ParserOptions &options)
Definition Parser.hpp:277
Index get_nrows() const
Definition Parser.hpp:751
Index get_nlines() const
Definition Parser.hpp:779
Classes and methods for parsing Matrix Market files.
unsigned long long Index
Definition Parser.hpp:33
Details extracted from the Matrix Market banner.
Definition utils.hpp:52
Symmetry symmetry
Definition utils.hpp:71
Format format
Definition utils.hpp:61
Object object
Definition utils.hpp:56
Field field
Definition utils.hpp:66
Options for the Parser constructor.
Definition Parser.hpp:38
std::size_t block_size
Definition Parser.hpp:49
int num_threads
Definition Parser.hpp:42
Utilities for matrix parsing.