eminem
Parse Matrix Market files in C++
Loading...
Searching...
No Matches
Parser.hpp
Go to the documentation of this file.
1#ifndef EMINEM_PARSER_HPP
2#define EMINEM_PARSER_HPP
3
4#include <vector>
5#include <string>
6#include <complex>
7#include <type_traits>
8#include <stdexcept>
9#include <memory>
10#include <thread>
11#include <mutex>
12#include <condition_variable>
13#include <limits>
14
16#include "byteme/PerByte.hpp"
17
18#include "utils.hpp"
19
26namespace eminem {
27
32typedef unsigned long long Index;
33
41 int num_threads = 1;
42
48 std::size_t block_size = 65536;
49};
50
54template<typename Workspace_>
55class ThreadPool {
56public:
57 template<typename RunJob_>
58 ThreadPool(RunJob_ run_job, int num_threads) : my_helpers(num_threads) {
59 std::mutex init_mut;
60 std::condition_variable init_cv;
61 int num_initialized = 0;
62
63 my_threads.reserve(num_threads);
64 for (int t = 0; t < num_threads; ++t) {
65 // Copy lambda as it will be gone once this constructor finishes.
66 my_threads.emplace_back([run_job,this,&init_mut,&init_cv,&num_initialized](int thread) -> void {
67 Helper env; // allocating this locally within each thread to reduce the risk of false sharing.
68 my_helpers[thread] = &env;
69 {
70 std::lock_guard lck(init_mut);
71 ++num_initialized;
72 init_cv.notify_one();
73 }
74
75 while (1) {
76 std::unique_lock lck(env.mut);
77 env.cv.wait(lck, [&]() -> bool { return env.input_ready; });
78 if (env.terminated) {
79 return;
80 }
81 env.input_ready = false;
82
83 try {
84 run_job(env.work);
85 } catch (...) {
86 std::lock_guard elck(my_error_mut);
87 if (!my_error) {
88 my_error = std::current_exception();
89 }
90 }
91
92 env.has_output = true;
93 env.available = true;
94 env.cv.notify_one();
95 }
96 }, t);
97 }
98
99 // Only returning once all threads (and their specific mutexes) are initialized.
100 {
101 std::unique_lock ilck(init_mut);
102 init_cv.wait(ilck, [&]() -> bool { return num_initialized == num_threads; });
103 }
104 }
105
106 ~ThreadPool() {
107 for (auto envptr : my_helpers) {
108 auto& env = *envptr;
109 {
110 std::lock_guard lck(env.mut);
111 env.terminated = true;
112 env.input_ready = true;
113 }
114 env.cv.notify_one();
115 }
116 for (auto& thread : my_threads) {
117 thread.join();
118 }
119 }
120
121private:
122 std::vector<std::thread> my_threads;
123
124 struct Helper {
125 std::mutex mut;
126 std::condition_variable cv;
127 bool input_ready = false;
128 bool available = true;
129 bool has_output = false;
130 bool terminated = false;
131 Workspace_ work;
132 };
133 std::vector<Helper*> my_helpers;
134
135 std::mutex my_error_mut;
136 std::exception_ptr my_error;
137
138public:
139 template<typename CreateJob_, typename MergeJob_>
140 bool run(CreateJob_ create_job, MergeJob_ merge_job) {
141 auto num_threads = my_threads.size();
142 bool finished = false;
143 decltype(num_threads) thread = 0, finished_count = 0;
144
145 // We submit jobs by cycling through all threads, then we merge their results in order of submission.
146 // This is a less efficient worksharing scheme but it guarantees the same order of merges.
147 while (1) {
148 auto& env = *(my_helpers[thread]);
149 std::unique_lock lck(env.mut);
150 env.cv.wait(lck, [&]() -> bool { return env.available; });
151
152 {
153 std::lock_guard elck(my_error_mut);
154 if (my_error) {
155 std::rethrow_exception(my_error);
156 }
157 }
158 env.available = false;
159
160 if (env.has_output) {
161 // If the user requests an early quit from the merge job,
162 // there's no point processing the later merge jobs from
163 // other threads, so we just break out at this point.
164 if (!merge_job(env.work)) {
165 return false;
166 }
167 env.has_output = false;
168 }
169
170 if (finished) {
171 // Go through all threads one last time, making sure all results are merged.
172 ++finished_count;
173 if (finished_count == num_threads) {
174 break;
175 }
176 } else {
177 finished = !create_job(env.work);
178 env.input_ready = true;
179 lck.unlock();
180 env.cv.notify_one();
181 }
182
183 ++thread;
184 if (thread == num_threads) {
185 thread = 0;
186 }
187 }
188
189 return true;
190 }
191};
192
193template<class Input_>
194bool fill_to_next_newline(Input_& input, std::vector<char>& buffer, std::size_t block_size) {
195 buffer.resize(block_size);
196 auto done = input.extract(block_size, buffer.data());
197 buffer.resize(done.first);
198 if (!done.second || buffer.empty()) {
199 return false;
200 }
201 char last = buffer.back();
202 while (last != '\n') {
203 last = input.get();
204 buffer.push_back(last);
205 if (!input.advance()) {
206 return false;
207 }
208 }
209 return true;
210}
211
212inline std::size_t count_newlines(const std::vector<char>& buffer) {
213 std::size_t n = 0;
214 for (auto x : buffer) {
215 n += (x == '\n');
216 }
217 return n;
218}
267template<class Input_>
268class Parser {
269public:
274 Parser(std::unique_ptr<Input_> input, const ParserOptions& options) :
275 my_input(std::move(input)),
276 my_nthreads(options.num_threads),
277 my_block_size(options.block_size)
278 {}
279
280private:
281 std::unique_ptr<Input_> my_input;
282 int my_nthreads;
283 std::size_t my_block_size;
284
285 Index my_current_line = 0;
286 MatrixDetails my_details;
287
288 template<typename Input2_>
289 static bool chomp(Input2_& input) {
290 while (1) {
291 char x = input.get();
292 if (x != ' ' && x != '\t' && x != '\r') {
293 return true;
294 }
295 if (!(input.advance())) {
296 break;
297 }
298 }
299 return false;
300 }
301
302 template<typename Input2_>
303 static bool advance_and_chomp(Input2_& input) {
304 // When the input is currently on a whitespace, we advance first so we
305 // avoid a redundant iteration where the comparison is always true.
306 if (!(input.advance())) {
307 return false;
308 }
309 return chomp(input);
310 }
311
312 template<typename Input2_>
313 static bool skip_lines(Input2_& input, Index& current_line) {
314 // Skip comments and empty lines.
315 while (1) {
316 char x = input.get();
317 if (x == '%') {
318 do {
319 if (!(input.advance())) {
320 return false;
321 }
322 } while (input.get() != '\n');
323 } else if (x != '\n') {
324 break;
325 }
326
327 if (!input.advance()) { // move past the newline.
328 return false;
329 }
330 ++current_line;
331 }
332 return true;
333 }
334
335private:
336 bool my_passed_banner = false;
337
338 struct ExpectedMatch {
339 ExpectedMatch(bool found, bool newline, bool remaining) : found(found), newline(newline), remaining(remaining) {}
340 ExpectedMatch() : ExpectedMatch(false, false, false) {}
341 bool found;
342 bool newline;
343 bool remaining;
344 };
345
346 ExpectedMatch advance_past_expected_string() {
347 if (!(my_input->advance())) { // move off the last character.
348 return ExpectedMatch(true, false, false);
349 }
350
351 char next = my_input->get();
352 if (next == ' ' || next == '\t' || next == '\r') {
353 if (!advance_and_chomp(*my_input)) { // gobble up all of the remaining horizontal space.
354 return ExpectedMatch(true, false, false);
355 }
356 if (my_input->get() == '\n') {
357 bool remaining = my_input->advance(); // move past the newline for consistency with other functions.
358 return ExpectedMatch(true, true, remaining); // move past the newline for consistency with other functions.
359 }
360 return ExpectedMatch(true, false, true);
361
362 } else if (next == '\n') {
363 bool remaining = my_input->advance(); // move past the newline for consistency with other functions.
364 return ExpectedMatch(true, true, remaining);
365 }
366
367 // If the next character is not a space or whitespace, it's not a match.
368 return ExpectedMatch(false, true, true);
369 }
370
371 ExpectedMatch is_expected_string(const char* ptr, std::size_t len, std::size_t start) {
372 // It is assumed that the first 'start' characters of 'ptr' where
373 // already checked and matched before entering this function, and that
374 // 'my_input' is currently positioned at the start-th character, i.e.,
375 // 'ptr[start-1]' (and thus requires an advance() call before we can
376 // compare against 'ptr[start]').
377 for (std::size_t i = start; i < len; ++i) {
378 if (!my_input->advance()) {
379 return ExpectedMatch(false, false, false);
380 }
381 if (my_input->get() != ptr[i]) {
382 return ExpectedMatch(false, false, true);
383 }
384 }
385 return advance_past_expected_string();
386 }
387
388 ExpectedMatch is_expected_string(const char* ptr, std::size_t len) {
389 // Using a default start of 1, assuming that we've already compared
390 // the first character before entering this function.
391 return is_expected_string(ptr, len, 1);
392 }
393
394 bool parse_banner_object() {
395 ExpectedMatch res;
396
397 char x = my_input->get();
398 if (x == 'm') {
399 res = is_expected_string("matrix", 6);
400 my_details.object = Object::MATRIX;
401 } else if (x == 'v') {
402 res = is_expected_string("vector", 6);
403 my_details.object = Object::VECTOR;
404 }
405
406 if (!res.found) {
407 throw std::runtime_error("first banner field should be one of 'matrix' or 'vector'");
408 }
409 if (!res.remaining) {
410 throw std::runtime_error("end of file reached after the first banner field");
411 }
412
413 return res.newline;
414 }
415
416 bool parse_banner_format() {
417 ExpectedMatch res;
418
419 char x = my_input->get();
420 if (x == 'c') {
421 res = is_expected_string("coordinate", 10);
422 my_details.format = Format::COORDINATE;
423 } else if (x == 'a') {
424 res = is_expected_string("array", 5);
425 my_details.format = Format::ARRAY;
426 }
427
428 if (!res.found) {
429 throw std::runtime_error("second banner field should be one of 'coordinate' or 'array'");
430 }
431 if (!res.remaining) {
432 throw std::runtime_error("end of file reached after the second banner field");
433 }
434
435 return res.newline;
436 }
437
438 bool parse_banner_field() {
439 ExpectedMatch res;
440
441 char x = my_input->get();
442 if (x == 'i') {
443 res = is_expected_string("integer", 7);
444 my_details.field = Field::INTEGER;
445 } else if (x == 'd') {
446 res = is_expected_string("double", 6);
447 my_details.field = Field::DOUBLE;
448 } else if (x == 'c') {
449 res = is_expected_string("complex", 7);
450 my_details.field = Field::COMPLEX;
451 } else if (x == 'p') {
452 res = is_expected_string("pattern", 7);
453 my_details.field = Field::PATTERN;
454 } else if (x == 'r') {
455 res = is_expected_string("real", 4);
456 my_details.field = Field::REAL;
457 }
458
459 if (!res.found) {
460 throw std::runtime_error("third banner field should be one of 'real', 'integer', 'double', 'complex' or 'pattern'");
461 }
462 if (!res.remaining) {
463 throw std::runtime_error("end of file reached after the third banner field");
464 }
465
466 return res.newline;
467 }
468
469 bool parse_banner_symmetry() {
470 ExpectedMatch res;
471
472 char x = my_input->get();
473 if (x == 'g') {
474 res = is_expected_string("general", 7);
475 my_details.symmetry = Symmetry::GENERAL;
476 } else if (x == 'h') {
477 res = is_expected_string("hermitian", 9);
478 my_details.symmetry = Symmetry::HERMITIAN;
479 } else if (x == 's') {
480 if (my_input->advance()) {
481 char x = my_input->get();
482 if (x == 'k') {
483 res = is_expected_string("skew-symmetric", 14, 2);
484 my_details.symmetry = Symmetry::SKEW_SYMMETRIC;
485 } else {
486 res = is_expected_string("symmetric", 9, 2);
487 my_details.symmetry = Symmetry::SYMMETRIC;
488 }
489 }
490 }
491
492 if (!res.found) {
493 throw std::runtime_error("fourth banner field should be one of 'general', 'hermitian', 'skew-symmetric' or 'symmetric'");
494 }
495 if (!res.remaining) {
496 throw std::runtime_error("end of file reached after the fourth banner field");
497 }
498
499 return res.newline;
500 }
501
502 void scan_banner() {
503 if (my_passed_banner) {
504 throw std::runtime_error("banner has already been scanned");
505 }
506 if (!(my_input->valid())) {
507 throw std::runtime_error("failed to find banner line before end of file");
508 }
509 if (my_input->get() != '%') {
510 throw std::runtime_error("first line of the file should be the banner");
511 }
512
513 auto found_banner = is_expected_string("%%MatrixMarket", 14);
514 if (!found_banner.remaining) {
515 throw std::runtime_error("end of file reached before matching the banner");
516 }
517 if (!found_banner.found) {
518 throw std::runtime_error("first line of the file should be the banner");
519 }
520 if (found_banner.newline) {
521 throw std::runtime_error("end of line reached before matching the banner");
522 }
523
524 if (parse_banner_object()) {
525 throw std::runtime_error("end of line reached after the first banner field");
526 }
527 if (parse_banner_format()) {
528 throw std::runtime_error("end of line reached after the second banner field");
529 }
530
531 bool eol = false;
532 if (my_details.object == Object::MATRIX) {
533 if (parse_banner_field()) {
534 throw std::runtime_error("end of line reached after the third banner field");
535 }
536 eol = parse_banner_symmetry();
537 } else {
538 // The NIST spec doesn't say anything about symmetry for vector,
539 // and it doesn't really make sense anyway. We'll just set it to
540 // general and hope for the best.
541 my_details.symmetry = Symmetry::GENERAL;
542
543 // No need to throw on newline because this might be the last field AFAICT.
544 eol = parse_banner_field();
545 }
546
547 my_passed_banner = true;
548
549 // Ignoring all other fields until the newline. We can use a do/while
550 // to skip the initial comparison because we know that the current
551 // character cannot be a newline if eol = false.
552 if (!eol) {
553 do {
554 if (!(my_input->advance())) {
555 throw std::runtime_error("end of file reached before the end of the banner line");
556 }
557 } while (my_input->get() != '\n');
558 my_input->advance(); // move past the newline.
559 }
560
561 ++my_current_line;
562 return;
563 }
564
565public:
572 const MatrixDetails& get_banner() const {
573 if (!my_passed_banner) {
574 throw std::runtime_error("banner has not yet been scanned");
575 }
576 return my_details;
577 }
578
579private:
580 // Only calls with 'last_ = true' need to know if there are any remaining bytes after the newline.
581 // This is because all non-last calls with no remaining bytes must have thrown.
582 struct NotLastSizeInfo {
583 Index index = 0;
584 };
585
586 struct LastSizeInfo {
587 Index index = 0;
588 bool remaining = false;
589 };
590
591 template<bool last_>
592 using SizeInfo = typename std::conditional<last_, LastSizeInfo, NotLastSizeInfo>::type;
593
594 template<bool last_, class Input2_>
595 static SizeInfo<last_> scan_integer_field(bool size, Input2_& input, Index overall_line_count) {
596 SizeInfo<last_> output;
597 bool found = false;
598
599 auto what = [&]() -> std::string {
600 if (size) {
601 return "size";
602 } else {
603 return "index";
604 }
605 };
606
607 constexpr Index max_limit = std::numeric_limits<Index>::max();
608 constexpr Index max_limit_before_mult = max_limit / 10;
609 constexpr Index max_limit_mod = max_limit % 10;
610
611 while (1) {
612 char x = input.get();
613 switch(x) {
614 case '0': case '1': case '2': case '3': case '4': case '5': case '6': case '7': case '8': case '9':
615 {
616 Index delta = x - '0';
617 // Structuring the conditionals so that it's most likely to short-circuit after only testing the first one.
618 if (output.index >= max_limit_before_mult && !(output.index == max_limit_before_mult && delta <= max_limit_mod)) {
619 throw std::runtime_error("integer overflow in " + what() + " field on line " + std::to_string(overall_line_count + 1));
620 }
621 output.index *= 10;
622 output.index += delta;
623 }
624 found = true;
625 break;
626 case '\n':
627 // This check only needs to be put here, as all blanks should be chomped before calling
628 // this function; so we must start on a non-blank character. This starting character is either:
629 // - a digit, in which case found = true and this check is unnecessary.
630 // - a non-newline non-digit, in case we throw.
631 // - a newline, in which case we arrive here.
632 if (!found) {
633 throw std::runtime_error("empty " + what() + " field on line " + std::to_string(overall_line_count + 1));
634 }
635 if constexpr(last_) {
636 output.remaining = input.advance(); // advance past the newline.
637 return output;
638 }
639 throw std::runtime_error("unexpected newline when parsing " + what() + " field on line " + std::to_string(overall_line_count + 1));
640 case ' ': case '\t': case '\r':
641 if (!advance_and_chomp(input)) { // skipping the current and subsequent blanks.
642 if constexpr(last_) {
643 return output;
644 } else {
645 throw std::runtime_error("unexpected end of file when parsing " + what() + " field on line " + std::to_string(overall_line_count + 1));
646 }
647 }
648 if constexpr(last_) {
649 if (input.get() != '\n') {
650 throw std::runtime_error("expected newline after the last " + what() + " field on line " + std::to_string(overall_line_count + 1));
651 }
652 output.remaining = input.advance(); // advance past the newline.
653 }
654 return output;
655 default:
656 throw std::runtime_error("unexpected character when parsing " + what() + " field on line " + std::to_string(overall_line_count + 1));
657 }
658
659 if (!(input.advance())) { // moving past the current digit.
660 if constexpr(last_) {
661 break;
662 } else {
663 throw std::runtime_error("unexpected end of file when parsing " + what() + " field on line " + std::to_string(overall_line_count + 1));
664 }
665 }
666 }
667
668 return output;
669 }
670
671 template<bool last_, class Input2_>
672 static SizeInfo<last_> scan_size_field(Input2_& input, Index overall_line_count) {
673 return scan_integer_field<last_>(true, input, overall_line_count);
674 }
675
676 template<bool last_, class Input2_>
677 static SizeInfo<last_> scan_index_field(Input2_& input, Index overall_line_count) {
678 return scan_integer_field<last_>(false, input, overall_line_count);
679 }
680
681private:
682 bool my_passed_size = false;
683 Index my_nrows = 0, my_ncols = 0, my_nlines = 0;
684
685 void scan_size() {
686 if (!(my_input->valid())) {
687 throw std::runtime_error("failed to find size line before end of file");
688 }
689
690 // Handling stray comments, empty lines, and leading whitespace.
691 if (!skip_lines(*my_input, my_current_line)) {
692 throw std::runtime_error("failed to find size line before end of file");
693 }
694 if (!chomp(*my_input)) {
695 throw std::runtime_error("expected at least one size field on line " + std::to_string(my_current_line + 1));
696 }
697
698 if (my_details.object == Object::MATRIX) {
699 if (my_details.format == Format::COORDINATE) {
700 auto first_field = scan_size_field<false>(*my_input, my_current_line);
701 my_nrows = first_field.index;
702
703 auto second_field = scan_size_field<false>(*my_input, my_current_line);
704 my_ncols = second_field.index;
705
706 auto third_field = scan_size_field<true>(*my_input, my_current_line);
707 my_nlines = third_field.index;
708
709 } else { // i.e., my_details.format == Format::ARRAY
710 auto first_field = scan_size_field<false>(*my_input, my_current_line);
711 my_nrows = first_field.index;
712
713 auto second_field = scan_size_field<true>(*my_input, my_current_line);
714 my_ncols = second_field.index;
715 my_nlines = my_nrows * my_ncols;
716 }
717
718 } else {
719 if (my_details.format == Format::COORDINATE) {
720 auto first_field = scan_size_field<false>(*my_input, my_current_line);
721 my_nrows = first_field.index;
722
723 auto second_field = scan_size_field<true>(*my_input, my_current_line);
724 my_nlines = second_field.index;
725
726 } else { // i.e., my_details.format == Format::ARRAY
727 auto first_field = scan_size_field<true>(*my_input, my_current_line);
728 my_nlines = first_field.index;
729 my_nrows = my_nlines;
730 }
731 my_ncols = 1;
732 }
733
734 ++my_current_line;
735 my_passed_size = true;
736 }
737
738public:
746 Index get_nrows() const {
747 if (!my_passed_size) {
748 throw std::runtime_error("size line has not yet been scanned");
749 }
750 return my_nrows;
751 }
752
760 Index get_ncols() const {
761 if (!my_passed_size) {
762 throw std::runtime_error("size line has not yet been scanned");
763 }
764 return my_ncols;
765 }
766
775 if (!my_passed_size) {
776 throw std::runtime_error("size line has not yet been scanned");
777 }
778 return my_nlines;
779 }
780
781public:
787 scan_banner();
788 scan_size();
789 return;
790 }
791
792private:
793 template<typename Type_>
794 struct ParseInfo {
795 ParseInfo() = default;
796 ParseInfo(Type_ value, bool remaining) : value(value), remaining(remaining) {}
797 Type_ value;
798 bool remaining;
799 };
800
801 template<typename Workspace_>
802 bool configure_parallel_workspace(Workspace_& work) {
803 bool available = fill_to_next_newline(*my_input, work.buffer, my_block_size);
804 work.contents.clear();
805 work.overall_line = my_current_line;
806 my_current_line += count_newlines(work.buffer);
807 return available;
808 }
809
810 void check_num_lines_loop(Index data_line_count) const {
811 if (data_line_count >= my_nlines) {
812 throw std::runtime_error("more lines present than specified in the header (" + std::to_string(data_line_count) + " versus " + std::to_string(my_nlines) + ")");
813 }
814 }
815
816 void check_num_lines_final(bool finished, Index data_line_count) const {
817 if (finished) {
818 if (data_line_count != my_nlines) {
819 // Must be fewer, otherwise we would have triggered the error in check_num_lines_loop() during iteration.
820 throw std::runtime_error("fewer lines present than specified in the header (" + std::to_string(data_line_count) + " versus " + std::to_string(my_nlines) + ")");
821 }
822 }
823 }
824
825private:
826 void check_matrix_coordinate_line(Index currow, Index curcol, Index overall_line_count) const {
827 if (!currow) {
828 throw std::runtime_error("row index must be positive on line " + std::to_string(overall_line_count + 1));
829 }
830 if (currow > my_nrows) {
831 throw std::runtime_error("row index out of range on line " + std::to_string(overall_line_count + 1));
832 }
833 if (!curcol) {
834 throw std::runtime_error("column index must be positive on line " + std::to_string(overall_line_count + 1));
835 }
836 if (curcol > my_ncols) {
837 throw std::runtime_error("column index out of range on line " + std::to_string(overall_line_count + 1));
838 }
839 }
840
841 template<typename Type_, class Input2_, typename FieldParser_, class WrappedStore_>
842 bool scan_matrix_coordinate_non_pattern_base(Input2_& input, Index& overall_line_count, FieldParser_& fparser, WrappedStore_ wstore) const {
843 bool valid = input.valid();
844 while (valid) {
845 // Handling stray comments, empty lines, and leading spaces.
846 if (!skip_lines(input, overall_line_count)) {
847 break;
848 }
849 if (!chomp(input)) {
850 throw std::runtime_error("expected at least three fields for a coordinate matrix on line " + std::to_string(overall_line_count + 1));
851 }
852
853 auto first_field = scan_index_field<false>(input, overall_line_count);
854 auto second_field = scan_index_field<false>(input, overall_line_count);
855 check_matrix_coordinate_line(first_field.index, second_field.index, overall_line_count);
856
857 // 'fparser' should leave 'input' at the start of the next line, if any exists.
858 ParseInfo<Type_> res = fparser(input, overall_line_count);
859 if (!wstore(first_field.index, second_field.index, res.value)) {
860 return false;
861 }
862 ++overall_line_count;
863 valid = res.remaining;
864 }
865
866 return true;
867 }
868
869 template<typename Type_, class FieldParser_, class Store_>
870 bool scan_matrix_coordinate_non_pattern(Store_ store) {
871 bool finished = false;
872 Index current_data_line = 0;
873
874 if (my_nthreads == 1) {
875 FieldParser_ fparser;
876 finished = scan_matrix_coordinate_non_pattern_base<Type_>(
877 *my_input,
878 my_current_line,
879 fparser,
880 [&](Index r, Index c, Type_ value) -> bool {
881 check_num_lines_loop(current_data_line);
882 ++current_data_line;
883 return store(r, c, value);
884 }
885 );
886
887 } else {
888 struct Workspace {
889 std::vector<char> buffer;
890 FieldParser_ fparser;
891 std::vector<std::tuple<Index, Index, Type_> > contents;
892 Index overall_line;
893 };
894
895 ThreadPool<Workspace> tp(
896 [&](Workspace& work) -> bool {
897 byteme::RawBufferReader reader(reinterpret_cast<const unsigned char*>(work.buffer.data()), work.buffer.size());
899 return scan_matrix_coordinate_non_pattern_base<Type_>(
900 pb,
901 work.overall_line,
902 work.fparser,
903 [&](Index r, Index c, Type_ value) -> bool {
904 work.contents.emplace_back(r, c, value);
905 return true; // threads cannot quit early in their parallel sections; this (and thus scan_*_base) must always return true.
906 }
907 );
908 },
909 my_nthreads
910 );
911
912 finished = tp.run(
913 [&](Workspace& work) -> bool {
914 return configure_parallel_workspace(work);
915 },
916 [&](Workspace& work) -> bool {
917 for (const auto& con : work.contents) {
918 check_num_lines_loop(current_data_line); // defer check here for the correctly sync'd value of current_data_line.
919 if (!store(std::get<0>(con), std::get<1>(con), std::get<2>(con))) {
920 return false;
921 }
922 ++current_data_line;
923 }
924 return true;
925 }
926 );
927 }
928
929 check_num_lines_final(finished, current_data_line);
930 return finished;
931 }
932
933private:
934 template<class Input2_, class WrappedStore_>
935 bool scan_matrix_coordinate_pattern_base(Input2_& input, Index& overall_line_count, WrappedStore_ wstore) const {
936 bool valid = input.valid();
937 while (valid) {
938 // Handling stray comments, empty lines, and leading spaces.
939 if (!skip_lines(input, overall_line_count)) {
940 break;
941 }
942 if (!chomp(input)) {
943 throw std::runtime_error("expected two fields for a pattern matrix on line " + std::to_string(overall_line_count + 1));
944 }
945
946 auto first_field = scan_index_field<false>(input, overall_line_count);
947 auto second_field = scan_index_field<true>(input, overall_line_count);
948 check_matrix_coordinate_line(first_field.index, second_field.index, overall_line_count);
949
950 if (!wstore(first_field.index, second_field.index)) {
951 return false;
952 }
953 ++overall_line_count;
954 valid = second_field.remaining;
955 }
956
957 return true;
958 }
959
960 template<class Store_>
961 bool scan_matrix_coordinate_pattern(Store_ store) {
962 bool finished = false;
963 Index current_data_line = 0;
964
965 if (my_nthreads == 1) {
966 finished = scan_matrix_coordinate_pattern_base(
967 *my_input,
968 my_current_line,
969 [&](Index r, Index c) -> bool {
970 check_num_lines_loop(current_data_line);
971 ++current_data_line;
972 return store(r, c);
973 }
974 );
975
976 } else {
977 struct Workspace {
978 std::vector<char> buffer;
979 std::vector<std::tuple<Index, Index> > contents;
980 Index overall_line;
981 };
982
983 ThreadPool<Workspace> tp(
984 [&](Workspace& work) -> bool {
985 byteme::RawBufferReader reader(reinterpret_cast<const unsigned char*>(work.buffer.data()), work.buffer.size());
987 return scan_matrix_coordinate_pattern_base(
988 pb,
989 work.overall_line,
990 [&](Index r, Index c) -> bool {
991 work.contents.emplace_back(r, c);
992 return true; // threads cannot quit early in their parallel sections; this (and thus scan_*_base) must always return true.
993 }
994 );
995 },
996 my_nthreads
997 );
998
999 finished = tp.run(
1000 [&](Workspace& work) -> bool {
1001 return configure_parallel_workspace(work);
1002 },
1003 [&](Workspace& work) -> bool {
1004 for (const auto& con : work.contents) {
1005 check_num_lines_loop(current_data_line);
1006 if (!store(std::get<0>(con), std::get<1>(con))) {
1007 return false;
1008 }
1009 ++current_data_line;
1010 }
1011 return true;
1012 }
1013 );
1014 }
1015
1016 check_num_lines_final(finished, current_data_line);
1017 return finished;
1018 }
1019
1020private:
1021 void check_vector_coordinate_line(Index currow, Index overall_line_count) const {
1022 if (!currow) {
1023 throw std::runtime_error("row index must be positive on line " + std::to_string(overall_line_count + 1));
1024 }
1025 if (currow > my_nrows) {
1026 throw std::runtime_error("row index out of range on line " + std::to_string(overall_line_count + 1));
1027 }
1028 }
1029
1030 template<typename Type_, class Input2_, class FieldParser_, class WrappedStore_>
1031 bool scan_vector_coordinate_non_pattern_base(Input2_& input, Index& overall_line_count, FieldParser_& fparser, WrappedStore_ wstore) const {
1032 bool valid = input.valid();
1033 while (valid) {
1034 // handling stray comments, empty lines, and leading spaces.
1035 if (!skip_lines(input, overall_line_count)) {
1036 break;
1037 }
1038 if (!chomp(input)) {
1039 throw std::runtime_error("expected at least two fields for a coordinate vector on line " + std::to_string(overall_line_count + 1));
1040 }
1041
1042 auto first_field = scan_index_field<false>(input, overall_line_count);
1043 check_vector_coordinate_line(first_field.index, overall_line_count);
1044
1045 // 'fparser' should leave 'input' at the start of the next line, if any exists.
1046 ParseInfo<Type_> res = fparser(input, overall_line_count);
1047 if (!wstore(first_field.index, res.value)) {
1048 return false;
1049 }
1050 ++overall_line_count;
1051 valid = res.remaining;
1052 }
1053
1054 return true;
1055 }
1056
1057 template<typename Type_, class FieldParser_, class Store_>
1058 bool scan_vector_coordinate_non_pattern(Store_ store) {
1059 bool finished = false;
1060 Index current_data_line = 0;
1061
1062 if (my_nthreads == 1) {
1063 FieldParser_ fparser;
1064 finished = scan_vector_coordinate_non_pattern_base<Type_>(
1065 *my_input,
1066 my_current_line,
1067 fparser,
1068 [&](Index r, Type_ value) -> bool {
1069 check_num_lines_loop(current_data_line);
1070 ++current_data_line;
1071 return store(r, 1, value);
1072 }
1073 );
1074
1075 } else {
1076 struct Workspace {
1077 std::vector<char> buffer;
1078 FieldParser_ fparser;
1079 std::vector<std::tuple<Index, Type_> > contents;
1080 Index overall_line;
1081 };
1082
1083 ThreadPool<Workspace> tp(
1084 [&](Workspace& work) -> bool {
1085 byteme::RawBufferReader reader(reinterpret_cast<const unsigned char*>(work.buffer.data()), work.buffer.size());
1087 return scan_vector_coordinate_non_pattern_base<Type_>(
1088 pb,
1089 work.overall_line,
1090 work.fparser,
1091 [&](Index r, Type_ value) -> bool {
1092 work.contents.emplace_back(r, value);
1093 return true; // threads cannot quit early in their parallel sections; this (and thus scan_*_base) must always return true.
1094 }
1095 );
1096 },
1097 my_nthreads
1098 );
1099
1100 finished = tp.run(
1101 [&](Workspace& work) -> bool {
1102 return configure_parallel_workspace(work);
1103 },
1104 [&](Workspace& work) -> bool {
1105 for (const auto& con : work.contents) {
1106 check_num_lines_loop(current_data_line);
1107 if (!store(std::get<0>(con), 1, std::get<1>(con))) {
1108 return false;
1109 }
1110 ++current_data_line;
1111 }
1112 return true;
1113 }
1114 );
1115 }
1116
1117 check_num_lines_final(finished, current_data_line);
1118 return finished;
1119 }
1120
1121private:
1122 template<class Input2_, class WrappedStore_>
1123 bool scan_vector_coordinate_pattern_base(Input2_& input, Index& overall_line_count, WrappedStore_ wstore) const {
1124 bool valid = input.valid();
1125 while (valid) {
1126 // Handling stray comments, empty lines, and leading spaces.
1127 if (!skip_lines(input, overall_line_count)) {
1128 break;
1129 }
1130 if (!chomp(input)) {
1131 throw std::runtime_error("expected one field for a coordinate vector on line " + std::to_string(overall_line_count + 1));
1132 }
1133
1134 auto first_field = scan_index_field<true>(input, overall_line_count);
1135 check_vector_coordinate_line(first_field.index, overall_line_count);
1136
1137 if (!wstore(first_field.index)) {
1138 return false;
1139 }
1140 ++overall_line_count;
1141 valid = first_field.remaining;
1142 }
1143
1144 return true;
1145 }
1146
1147 template<class Store_>
1148 bool scan_vector_coordinate_pattern(Store_ store) {
1149 bool finished = false;
1150 Index current_data_line = 0;
1151
1152 if (my_nthreads == 1) {
1153 finished = scan_vector_coordinate_pattern_base(
1154 *my_input,
1155 my_current_line,
1156 [&](Index r) -> bool {
1157 check_num_lines_loop(current_data_line);
1158 ++current_data_line;
1159 return store(r, 1);
1160 }
1161 );
1162
1163 } else {
1164 struct Workspace {
1165 std::vector<char> buffer;
1166 std::vector<Index> contents;
1167 Index overall_line;
1168 };
1169
1170 ThreadPool<Workspace> tp(
1171 [&](Workspace& work) -> bool {
1172 byteme::RawBufferReader reader(reinterpret_cast<const unsigned char*>(work.buffer.data()), work.buffer.size());
1174 return scan_vector_coordinate_pattern_base(
1175 pb,
1176 work.overall_line,
1177 [&](Index r) -> bool {
1178 work.contents.emplace_back(r);
1179 return true; // threads cannot quit early in their parallel sections; this (and thus scan_*_base) must always return true.
1180 }
1181 );
1182 },
1183 my_nthreads
1184 );
1185
1186 finished = tp.run(
1187 [&](Workspace& work) -> bool {
1188 return configure_parallel_workspace(work);
1189 },
1190 [&](Workspace& work) -> bool {
1191 for (const auto& r : work.contents) {
1192 check_num_lines_loop(current_data_line);
1193 if (!store(r, 1)) {
1194 return false;
1195 }
1196 ++current_data_line;
1197 }
1198 return true;
1199 }
1200 );
1201 }
1202
1203 check_num_lines_final(finished, current_data_line);
1204 return finished;
1205 }
1206
1207private:
1208 template<typename Type_, class Input2_, class FieldParser_, class WrappedStore_>
1209 bool scan_matrix_array_base(Input2_& input, Index& overall_line_count, FieldParser_& fparser, WrappedStore_ wstore) const {
1210 bool valid = input.valid();
1211 while (valid) {
1212 // Handling stray comments, empty lines, and leading spaces.
1213 if (!skip_lines(input, overall_line_count)) {
1214 break;
1215 }
1216 if (!chomp(input)) {
1217 throw std::runtime_error("expected at least one field for an array matrix on line " + std::to_string(overall_line_count + 1));
1218 }
1219
1220 // 'fparser' should leave 'input' at the start of the next line, if any exists.
1221 ParseInfo<Type_> res = fparser(input, overall_line_count);
1222 if (!wstore(res.value)) {
1223 return false;
1224 }
1225 ++overall_line_count;
1226 valid = res.remaining;
1227 }
1228
1229 return true;
1230 }
1231
1232 template<typename Type_, class FieldParser_, class Store_>
1233 bool scan_matrix_array(Store_ store) {
1234 bool finished = false;
1235 Index current_data_line = 0;
1236
1237 Index currow = 1, curcol = 1;
1238 auto increment = [&]() {
1239 ++currow;
1240 if (currow > my_nrows) {
1241 ++curcol;
1242 currow = 1;
1243 }
1244 };
1245
1246 if (my_nthreads == 1) {
1247 FieldParser_ fparser;
1248 finished = scan_matrix_array_base<Type_>(
1249 *my_input,
1250 my_current_line,
1251 fparser,
1252 [&](Type_ value) -> bool {
1253 check_num_lines_loop(current_data_line);
1254 if (!store(currow, curcol, value)) {
1255 return false;
1256 }
1257 ++current_data_line;
1258 increment();
1259 return true;
1260 }
1261 );
1262
1263 } else {
1264 struct Workspace {
1265 std::vector<char> buffer;
1266 FieldParser_ fparser;
1267 std::vector<Type_> contents;
1268 Index overall_line;
1269 };
1270
1271 ThreadPool<Workspace> tp(
1272 [&](Workspace& work) -> bool {
1273 byteme::RawBufferReader reader(reinterpret_cast<const unsigned char*>(work.buffer.data()), work.buffer.size());
1275 return scan_matrix_array_base<Type_>(
1276 pb,
1277 work.overall_line,
1278 work.fparser,
1279 [&](Type_ value) -> bool {
1280 work.contents.emplace_back(value);
1281 return true; // threads cannot quit early in their parallel sections; this (and thus scan_*_base) must always return true.
1282 }
1283 );
1284 },
1285 my_nthreads
1286 );
1287
1288 finished = tp.run(
1289 [&](Workspace& work) -> bool {
1290 return configure_parallel_workspace(work);
1291 },
1292 [&](Workspace& work) -> bool {
1293 for (const auto& val : work.contents) {
1294 check_num_lines_loop(current_data_line);
1295 if (!store(currow, curcol, val)) {
1296 return false;
1297 }
1298 ++current_data_line;
1299 increment();
1300 }
1301 return true;
1302 }
1303 );
1304 }
1305
1306 check_num_lines_final(finished, current_data_line);
1307 return finished;
1308 }
1309
1310private:
1311 template<typename Type_, class Input2_, class FieldParser_, class WrappedStore_>
1312 bool scan_vector_array_base(Input2_& input, Index& overall_line_count, FieldParser_& fparser, WrappedStore_ wstore) const {
1313 bool valid = input.valid();
1314 while (valid) {
1315 // Handling stray comments, empty lines, and leading spaces.
1316 if (!skip_lines(input, overall_line_count)) {
1317 break;
1318 }
1319 if (!chomp(input)) {
1320 throw std::runtime_error("expected at least one field for an array vector on line " + std::to_string(overall_line_count + 1));
1321 }
1322
1323 // 'fparser' should leave 'input' at the start of the next line, if any exists.
1324 ParseInfo<Type_> res = fparser(input, overall_line_count);
1325 if (!wstore(res.value)) {
1326 return false;
1327 }
1328 ++overall_line_count;
1329 valid = res.remaining;
1330 }
1331
1332 return true;
1333 }
1334
1335 template<typename Type_, class FieldParser_, class Store_>
1336 bool scan_vector_array(Store_ store) {
1337 bool finished = false;
1338 Index current_data_line = 0;
1339 if (my_nthreads == 1) {
1340 FieldParser_ fparser;
1341 finished = scan_vector_array_base<Type_>(
1342 *my_input,
1343 my_current_line,
1344 fparser,
1345 [&](Type_ value) -> bool {
1346 check_num_lines_loop(current_data_line);
1347 ++current_data_line;
1348 return store(current_data_line, 1, value);
1349 }
1350 );
1351
1352 } else {
1353 struct Workspace {
1354 std::vector<char> buffer;
1355 FieldParser_ fparser;
1356 std::vector<Type_> contents;
1357 Index overall_line;
1358 };
1359
1360 ThreadPool<Workspace> tp(
1361 [&](Workspace& work) -> bool {
1362 byteme::RawBufferReader reader(reinterpret_cast<const unsigned char*>(work.buffer.data()), work.buffer.size());
1364 return scan_vector_array_base<Type_>(
1365 pb,
1366 work.overall_line,
1367 work.fparser,
1368 [&](Type_ value) -> bool {
1369 work.contents.emplace_back(value);
1370 return true; // threads cannot quit early in their parallel sections; this (and thus scan_*_base) must always return true.
1371 }
1372 );
1373 },
1374 my_nthreads
1375 );
1376
1377 finished = tp.run(
1378 [&](Workspace& work) -> bool {
1379 return configure_parallel_workspace(work);
1380 },
1381 [&](Workspace& work) -> bool {
1382 for (const auto& val : work.contents) {
1383 check_num_lines_loop(current_data_line);
1384 ++current_data_line;
1385 if (!store(current_data_line, 1, val)) {
1386 return false;
1387 }
1388 }
1389 return true;
1390 }
1391 );
1392 }
1393
1394 check_num_lines_final(finished, current_data_line);
1395 return finished;
1396 }
1397
1398private:
1399 void check_preamble() const {
1400 if (!my_passed_banner || !my_passed_size) {
1401 throw std::runtime_error("banner or size lines have not yet been parsed");
1402 }
1403 }
1404
1405 template<typename Type_>
1406 class IntegerFieldParser {
1407 public:
1408 template<class Input2_>
1409 ParseInfo<Type_> operator()(Input2_& input, Index overall_line_count) {
1410 char firstchar = input.get();
1411 bool negative = (firstchar == '-');
1412 if (negative || firstchar == '+') {
1413 if (!(input.advance())) {
1414 throw std::runtime_error("premature termination of an integer on line " + std::to_string(overall_line_count + 1));
1415 }
1416 }
1417
1418 constexpr Type_ upper_limit = std::numeric_limits<Type_>::max();
1419 constexpr Type_ upper_limit_before_mult = upper_limit / 10;
1420 constexpr Type_ upper_limit_mod = upper_limit % 10;
1421 constexpr Type_ lower_limit = std::numeric_limits<Type_>::lowest();
1422 constexpr Type_ lower_limit_before_mult = lower_limit / 10;
1423 constexpr Type_ lower_limit_mod = -(lower_limit % 10);
1424
1425 Type_ val = 0;
1426 bool found = false;
1427 while (1) {
1428 char x = input.get();
1429 switch (x) {
1430 case '0': case '1': case '2': case '3': case '4': case '5': case '6': case '7': case '8': case '9':
1431 {
1432 Type_ delta = x - '0';
1433 // We have to handle negative and positive cases separately as they overflow at different thresholds.
1434 if (negative) {
1435 // Structuring the conditionals so that it's most likely to short-circuit after only testing the first one.
1436 if (val <= lower_limit_before_mult && !(val == lower_limit_before_mult && delta <= lower_limit_mod)) {
1437 throw std::runtime_error("integer underflow on line " + std::to_string(overall_line_count + 1));
1438 }
1439 val *= 10;
1440 val -= delta;
1441 } else {
1442 if (val >= upper_limit_before_mult && !(val == upper_limit_before_mult && delta <= upper_limit_mod)) {
1443 throw std::runtime_error("integer overflow on line " + std::to_string(overall_line_count + 1));
1444 }
1445 val *= 10;
1446 val += delta;
1447 }
1448 }
1449 found = true;
1450 break;
1451 case ' ': case '\t': case '\r':
1452 if (!advance_and_chomp(input)) { // skipping past the current position before chomping.
1453 return ParseInfo<Type_>(val, false);
1454 }
1455 if (input.get() != '\n') {
1456 throw std::runtime_error("more fields than expected on line " + std::to_string(overall_line_count + 1));
1457 }
1458 return ParseInfo<Type_>(val, input.advance()); // move past the newline.
1459 case '\n':
1460 // This check only needs to be put here, as all blanks should be chomped before calling
1461 // this function; so we must start on a non-blank character. This starting character is either:
1462 // - a digit, in which case found = true and this check is unnecessary.
1463 // - a non-newline non-digit, in case we throw.
1464 // - a newline, in which case we arrive here.
1465 if (!found) {
1466 throw std::runtime_error("empty integer field on line " + std::to_string(overall_line_count + 1));
1467 }
1468 return ParseInfo<Type_>(val, input.advance()); // move past the newline.
1469 default:
1470 throw std::runtime_error("expected an integer value on line " + std::to_string(overall_line_count + 1));
1471 }
1472
1473 if (!(input.advance())) {
1474 break;
1475 }
1476 }
1477
1478 return ParseInfo<Type_>(val, false);
1479 }
1480 };
1481
1482public:
1496 template<typename Type_ = int, class Store_>
1497 bool scan_integer(Store_ store) {
1498 check_preamble();
1499
1500 auto wrapped_store = [&](Index r, Index c, Type_ val) -> bool {
1501 if constexpr(std::is_same<typename std::invoke_result<Store_, Index, Index, Type_>::type, bool>::value) {
1502 return store(r, c, val);
1503 } else {
1504 store(r, c, val);
1505 return true;
1506 }
1507 };
1508
1509 if (my_details.format == Format::COORDINATE) {
1510 if (my_details.object == Object::MATRIX) {
1511 return scan_matrix_coordinate_non_pattern<Type_, IntegerFieldParser<Type_> >(std::move(wrapped_store));
1512 } else {
1513 return scan_vector_coordinate_non_pattern<Type_, IntegerFieldParser<Type_> >(std::move(wrapped_store));
1514 }
1515 } else {
1516 if (my_details.object == Object::MATRIX) {
1517 return scan_matrix_array<Type_, IntegerFieldParser<Type_> >(std::move(wrapped_store));
1518 } else {
1519 return scan_vector_array<Type_, IntegerFieldParser<Type_> >(std::move(wrapped_store));
1520 }
1521 }
1522 }
1523
1524private:
1525 template<bool last_, typename Type_, typename Input2_>
1526 static typename std::conditional<last_, ParseInfo<Type_>, Type_>::type parse_special(Input2_& input, bool negative, bool check_inf, Index overall_line_count) {
1527 auto what = [&]() -> std::string {
1528 if (check_inf) {
1529 return std::string("infinity");
1530 } else {
1531 return std::string("NaN");
1532 }
1533 };
1534
1535 auto check = [&](char lower, char upper) -> void {
1536 if (!input.advance()) {
1537 throw std::runtime_error("unexpected termination of " + what() + " on line " + std::to_string(overall_line_count + 1));
1538 }
1539 char current = input.get();
1540 if (current != lower && current != upper) {
1541 throw std::runtime_error("unexpected character when parsing " + what() + " on line " + std::to_string(overall_line_count + 1));
1542 }
1543 };
1544
1545 bool remaining = true;
1546 if (check_inf) {
1547 // We already know that we're starting with 'i', so we can proceed to the remaining two letters.
1548 check('n', 'N');
1549 check('f', 'F');
1550
1551 // Checking if there's any more letters.
1552 remaining = input.advance();
1553 if (remaining) {
1554 char current = input.get();
1555 if (current != '\n' && current != ' ' && current != '\t' && current != '\r') {
1556 if (current != 'i' && current != 'I') {
1557 throw std::runtime_error("unexpected character when parsing " + what() + " on line " + std::to_string(overall_line_count + 1));
1558 }
1559 check('n', 'N');
1560 check('i', 'I');
1561 check('t', 'T');
1562 check('y', 'Y');
1563 remaining = input.advance();
1564 }
1565 }
1566 } else {
1567 // We already know that we're starting with 'n', so we can proceed to the remaining two letters.
1568 check('a', 'A');
1569 check('n', 'N');
1570 remaining = input.advance();
1571 }
1572
1573 if (remaining) {
1574 // Using a switch for consistency with parse_real().
1575 switch(input.get()) {
1576 case ' ': case '\t': case '\r':
1577 if (!advance_and_chomp(input)) {
1578 if constexpr(last_) {
1579 remaining = false;
1580 break;
1581 }
1582 throw std::runtime_error("unexpected end of file on line " + std::to_string(overall_line_count + 1));
1583 }
1584 if constexpr(last_) {
1585 if (input.get() != '\n') {
1586 throw std::runtime_error("more fields than expected on line " + std::to_string(overall_line_count + 1));
1587 }
1588 remaining = input.advance(); // advance past the newline
1589 }
1590 break;
1591 case '\n':
1592 if constexpr(last_) {
1593 remaining = input.advance(); // advance past the newline.
1594 break;
1595 }
1596 throw std::runtime_error("unexpected newline on line " + std::to_string(overall_line_count + 1));
1597 default:
1598 throw std::runtime_error("unexpected character when parsing " + what() + " on line " + std::to_string(overall_line_count + 1));
1599 }
1600 } else {
1601 if constexpr(!last_) {
1602 throw std::runtime_error("unexpected end of file on line " + std::to_string(overall_line_count + 1));
1603 }
1604 }
1605
1606 Type_ value;
1607 if (check_inf) {
1608 if constexpr(!std::numeric_limits<Type_>::has_infinity) {
1609 throw std::runtime_error("requested type does not support " + what());
1610 }
1611 value = std::numeric_limits<Type_>::infinity();
1612 } else {
1613 if constexpr(!std::numeric_limits<Type_>::has_quiet_NaN) {
1614 throw std::runtime_error("requested type does not support " + what());
1615 }
1616 value = std::numeric_limits<Type_>::quiet_NaN();
1617 }
1618 if (negative) {
1619 value *= -1;
1620 }
1621
1622 if constexpr(last_) {
1623 ParseInfo<Type_> output;
1624 output.value = value;
1625 output.remaining = remaining;
1626 return output;
1627 } else {
1628 return value;
1629 }
1630 }
1631
1632 template<bool last_, typename Type_, typename Input2_>
1633 static typename std::conditional<last_, ParseInfo<Type_>, Type_>::type parse_real(Input2_& input, Index overall_line_count) {
1634 char firstchar = input.get();
1635 bool negative = (firstchar == '-');
1636 if (negative || firstchar == '+') {
1637 if (!(input.advance())) {
1638 throw std::runtime_error("unexpected end of file on line " + std::to_string(overall_line_count + 1));
1639 }
1640 }
1641
1642 // Check for specials.
1643 switch (input.get()) {
1644 case 'i': case 'I':
1645 return parse_special<last_, Type_>(input, negative, true, overall_line_count);
1646 case 'n': case 'N':
1647 return parse_special<last_, Type_>(input, negative, false, overall_line_count);
1648 };
1649
1650 // Processing the integer component.
1651 Type_ value = 0;
1652 bool found = false;
1653 bool remaining = true;
1654
1655 while (1) {
1656 char val = input.get();
1657 switch(val) {
1658 case '0': case '1': case '2': case '3': case '4': case '5': case '6': case '7': case '8': case '9':
1659 value *= 10;
1660 value += val - '0';
1661 found = true;
1662 break;
1663 case ' ': case '\t': case '\r':
1664 if (!advance_and_chomp(input)) {
1665 if constexpr(last_) {
1666 remaining = false;
1667 goto final_processing;
1668 }
1669 throw std::runtime_error("unexpected end of file on line " + std::to_string(overall_line_count + 1));
1670 }
1671 if constexpr(last_) {
1672 if (input.get() != '\n') {
1673 throw std::runtime_error("more fields than expected on line " + std::to_string(overall_line_count + 1));
1674 }
1675 remaining = input.advance(); // advance past the newline
1676 }
1677 goto final_processing;
1678 case '\n':
1679 if constexpr(last_) {
1680 remaining = input.advance(); // advance past the newline
1681 goto final_processing;
1682 }
1683 throw std::runtime_error("unexpected newline on line " + std::to_string(overall_line_count + 1));
1684 case '.':
1685 if (!input.advance()) {
1686 if constexpr(last_) {
1687 remaining = false;
1688 goto final_processing;
1689 }
1690 throw std::runtime_error("unexpected end of file on line " + std::to_string(overall_line_count + 1));
1691 }
1692 goto decimal_processing;
1693 case 'e': case 'E':
1694 if (!input.advance()) {
1695 throw std::runtime_error("unexpected end of file on line " + std::to_string(overall_line_count + 1));
1696 }
1697 goto exponent_processing;
1698 default:
1699 throw std::runtime_error("unrecognized character in real number on line " + std::to_string(overall_line_count + 1));
1700 }
1701
1702 if (!(input.advance())) {
1703 if constexpr(last_) {
1704 remaining = input.advance();
1705 goto final_processing;
1706 }
1707 throw std::runtime_error("unexpected end of file on line " + std::to_string(overall_line_count + 1));
1708 }
1709 }
1710
1711 // Processing the decimal component.
1712decimal_processing:
1713 {
1714 Type_ multiplier = 1;
1715 while (1) {
1716 char val = input.get();
1717 switch(val) {
1718 case '0': case '1': case '2': case '3': case '4': case '5': case '6': case '7': case '8': case '9':
1719 multiplier *= 10;
1720 value += (val - '0') / multiplier;
1721 found = true;
1722 break;
1723 case ' ': case '\t': case '\r':
1724 if (!advance_and_chomp(input)) {
1725 if constexpr(last_) {
1726 remaining = false;
1727 goto final_processing;
1728 }
1729 throw std::runtime_error("unexpected end of file on line " + std::to_string(overall_line_count + 1));
1730 }
1731 if constexpr(last_) {
1732 if (input.get() != '\n') {
1733 throw std::runtime_error("more fields than expected on line " + std::to_string(overall_line_count + 1));
1734 }
1735 remaining = input.advance(); // advance past the newline
1736 }
1737 goto final_processing;
1738 case '\n':
1739 if constexpr(last_) {
1740 remaining = input.advance(); // advance past the newline
1741 goto final_processing;
1742 }
1743 throw std::runtime_error("unexpected newline on line " + std::to_string(overall_line_count + 1));
1744 case 'e': case 'E':
1745 if (!input.advance()) {
1746 throw std::runtime_error("unexpected end of file on line " + std::to_string(overall_line_count + 1));
1747 }
1748 goto exponent_processing;
1749 default:
1750 throw std::runtime_error("unrecognized character in real number on line " + std::to_string(overall_line_count + 1));
1751 }
1752
1753 if (!(input.advance())) {
1754 if constexpr(last_) {
1755 remaining = input.advance();
1756 goto final_processing;
1757 }
1758 throw std::runtime_error("unexpected end of file on line " + std::to_string(overall_line_count + 1));
1759 }
1760 }
1761 }
1762
1763 // Processing the exponent.
1764exponent_processing:
1765 {
1766 bool expnegative = (input.get() == '-');
1767 if (expnegative || input.get() == '+') {
1768 if (!(input.advance())) {
1769 throw std::runtime_error("unexpected end of file on line " + std::to_string(overall_line_count + 1));
1770 }
1771 }
1772
1773 Type_ exponent = 0;
1774 bool expfound = false;
1775 while (1) {
1776 char val = input.get();
1777 switch(val) {
1778 case '0': case '1': case '2': case '3': case '4': case '5': case '6': case '7': case '8': case '9':
1779 exponent *= 10;
1780 exponent += (val - '0');
1781 expfound = true;
1782 break;
1783 case ' ': case '\t': case '\r':
1784 if (!advance_and_chomp(input)) {
1785 if constexpr(last_) {
1786 remaining = false;
1787 goto exponent_processing_finish;
1788 }
1789 throw std::runtime_error("unexpected end of file on line " + std::to_string(overall_line_count + 1));
1790 }
1791 if constexpr(last_) {
1792 if (input.get() != '\n') {
1793 throw std::runtime_error("more fields than expected on line " + std::to_string(overall_line_count + 1));
1794 }
1795 remaining = input.advance(); // advance past the newline
1796 }
1797 goto exponent_processing_finish;
1798 case '\n':
1799 if constexpr(last_) {
1800 remaining = input.advance(); // advance past the newline
1801 goto exponent_processing_finish;
1802 }
1803 throw std::runtime_error("unexpected newline on line " + std::to_string(overall_line_count + 1));
1804 default:
1805 throw std::runtime_error("unrecognized character in real number on line " + std::to_string(overall_line_count + 1));
1806 }
1807
1808 if (!(input.advance())) {
1809 if constexpr(last_) {
1810 remaining = input.advance();
1811 goto exponent_processing_finish;
1812 }
1813 throw std::runtime_error("unexpected end of file on line " + std::to_string(overall_line_count + 1));
1814 }
1815 }
1816
1817exponent_processing_finish:
1818 if (!expfound) {
1819 throw std::runtime_error("no digits in the decimal exponent on line " + std::to_string(overall_line_count + 1));
1820 }
1821 if (expnegative) {
1822 exponent *= -1;
1823 }
1824 value *= std::pow(10.0, exponent);
1825 }
1826
1827final_processing:
1828 if (!found) {
1829 throw std::runtime_error("no digits in real number on line " + std::to_string(overall_line_count + 1));
1830 }
1831 if (negative) {
1832 value *= -1;
1833 }
1834
1835 if constexpr(last_) {
1836 ParseInfo<Type_> output;
1837 output.value = value;
1838 output.remaining = remaining;
1839 return output;
1840 } else {
1841 return value;
1842 }
1843 }
1844
1845 template<typename Type_>
1846 class RealFieldParser {
1847 public:
1848 template<class Input2_>
1849 ParseInfo<Type_> operator()(Input2_& input, Index overall_line_count) {
1850 return parse_real<true, Type_>(input, overall_line_count);
1851 }
1852 };
1853
1854public:
1868 template<typename Type_ = double, class Store_>
1869 bool scan_real(Store_&& store) {
1870 check_preamble();
1871
1872 auto store_real = [&](Index r, Index c, Type_ val) -> bool {
1873 if constexpr(std::is_same<typename std::invoke_result<Store_, Index, Index, Type_>::type, bool>::value) {
1874 return store(r, c, val);
1875 } else {
1876 store(r, c, val);
1877 return true;
1878 }
1879 };
1880
1881 if (my_details.format == Format::COORDINATE) {
1882 if (my_details.object == Object::MATRIX) {
1883 return scan_matrix_coordinate_non_pattern<Type_, RealFieldParser<Type_> >(std::move(store_real));
1884 } else {
1885 return scan_vector_coordinate_non_pattern<Type_, RealFieldParser<Type_> >(std::move(store_real));
1886 }
1887 } else {
1888 if (my_details.object == Object::MATRIX) {
1889 return scan_matrix_array<Type_, RealFieldParser<Type_> >(std::move(store_real));
1890 } else {
1891 return scan_vector_array<Type_, RealFieldParser<Type_> >(std::move(store_real));
1892 }
1893 }
1894 }
1895
1910 template<typename Type_ = double, class Store_>
1911 bool scan_double(Store_ store) {
1912 return scan_real<Type_, Store_>(std::move(store));
1913 }
1914
1915private:
1916 template<typename InnerType_>
1917 class ComplexFieldParser {
1918 public:
1919 template<typename Input2_>
1920 ParseInfo<std::complex<InnerType_> > operator()(Input2_& input, Index overall_line_count) {
1921 auto first = parse_real<false, InnerType_>(input, overall_line_count);
1922 auto second = parse_real<true, InnerType_>(input, overall_line_count);
1923 ParseInfo<std::complex<InnerType_> > output;
1924 output.value.real(first);
1925 output.value.imag(second.value);
1926 output.remaining = second.remaining;
1927 return output;
1928 }
1929 };
1930
1931public:
1945 template<typename Type_ = double, class Store_>
1946 bool scan_complex(Store_ store) {
1947 check_preamble();
1948
1949 typedef std::complex<Type_> FullType;
1950 auto store_comp = [&](Index r, Index c, FullType val) -> bool {
1951 if constexpr(std::is_same<typename std::invoke_result<Store_, Index, Index, FullType>::type, bool>::value) {
1952 return store(r, c, val);
1953 } else {
1954 store(r, c, val);
1955 return true;
1956 }
1957 };
1958
1959 if (my_details.format == Format::COORDINATE) {
1960 if (my_details.object == Object::MATRIX) {
1961 return scan_matrix_coordinate_non_pattern<FullType, ComplexFieldParser<Type_> >(std::move(store_comp));
1962 } else {
1963 return scan_vector_coordinate_non_pattern<FullType, ComplexFieldParser<Type_> >(std::move(store_comp));
1964 }
1965 } else {
1966 if (my_details.object == Object::MATRIX) {
1967 return scan_matrix_array<FullType, ComplexFieldParser<Type_> >(std::move(store_comp));
1968 } else {
1969 return scan_vector_array<FullType, ComplexFieldParser<Type_> >(std::move(store_comp));
1970 }
1971 }
1972 }
1973
1989 template<typename Type_ = bool, class Store_>
1990 bool scan_pattern(Store_ store) {
1991 check_preamble();
1992 if (my_details.format != Format::COORDINATE) {
1993 throw std::runtime_error("'array' format for 'pattern' field is not supported");
1994 }
1995
1996 auto store_pat = [&](Index r, Index c) -> bool {
1997 if constexpr(std::is_same<typename std::invoke_result<Store_, Index, Index, bool>::type, bool>::value) {
1998 return store(r, c, true);
1999 } else {
2000 store(r, c, true);
2001 return true;
2002 }
2003 };
2004
2005 if (my_details.object == Object::MATRIX) {
2006 return scan_matrix_coordinate_pattern(std::move(store_pat));
2007 } else {
2008 return scan_vector_coordinate_pattern(std::move(store_pat));
2009 }
2010 }
2011};
2012
2013}
2014
2015#endif
Parse a matrix from a Matrix Market file.
Definition Parser.hpp:268
Index get_ncols() const
Definition Parser.hpp:760
bool scan_integer(Store_ store)
Definition Parser.hpp:1497
const MatrixDetails & get_banner() const
Definition Parser.hpp:572
bool scan_pattern(Store_ store)
Definition Parser.hpp:1990
bool scan_complex(Store_ store)
Definition Parser.hpp:1946
bool scan_real(Store_ &&store)
Definition Parser.hpp:1869
bool scan_double(Store_ store)
Definition Parser.hpp:1911
void scan_preamble()
Definition Parser.hpp:786
Parser(std::unique_ptr< Input_ > input, const ParserOptions &options)
Definition Parser.hpp:274
Index get_nrows() const
Definition Parser.hpp:746
Index get_nlines() const
Definition Parser.hpp:774
Classes and methods for parsing Matrix Market files.
unsigned long long Index
Definition Parser.hpp:32
Details extracted from the Matrix Market banner.
Definition utils.hpp:52
Symmetry symmetry
Definition utils.hpp:71
Format format
Definition utils.hpp:61
Object object
Definition utils.hpp:56
Field field
Definition utils.hpp:66
Options for the Parser constructor.
Definition Parser.hpp:37
std::size_t block_size
Definition Parser.hpp:48
int num_threads
Definition Parser.hpp:41
Utilities for matrix parsing.