From a5d1689bc7e05983a44cef5310ae7c50e10fefce Mon Sep 17 00:00:00 2001 From: Matthias Schiffer Date: Fri, 10 Apr 2015 03:46:13 +0200 Subject: Implement SLR parser generation At the moment, the generated parser is very inefficient, but that will change soon. --- src/CMakeLists.txt | 1 + src/output_slr.cpp | 164 +++++++++++++++++++++++++++++++++++++++++++++++++++++ src/output_slr.hpp | 56 ++++++++++++++++++ src/solar.cpp | 8 +-- 4 files changed, 225 insertions(+), 4 deletions(-) create mode 100644 src/output_slr.cpp create mode 100644 src/output_slr.hpp diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt index 725d0f0..3f2be49 100644 --- a/src/CMakeLists.txt +++ b/src/CMakeLists.txt @@ -5,6 +5,7 @@ add_executable(solar lex.cpp output.cpp output_lr0.cpp + output_slr.cpp parser.cpp parser_state.cpp solar.cpp diff --git a/src/output_slr.cpp b/src/output_slr.cpp new file mode 100644 index 0000000..ad0d415 --- /dev/null +++ b/src/output_slr.cpp @@ -0,0 +1,164 @@ +/* + Copyright (c) 2015, Matthias Schiffer + All rights reserved. + + Redistribution and use in source and binary forms, with or without + modification, are permitted provided that the following conditions are met: + + 1. Redistributions of source code must retain the above copyright notice, + this list of conditions and the following disclaimer. + 2. Redistributions in binary form must reproduce the above copyright notice, + this list of conditions and the following disclaimer in the documentation + and/or other materials provided with the distribution. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE + DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE + FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR + SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER + CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, + OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +*/ + + +#include "output_slr.hpp" + +#include +#include +#include + + +namespace solar { + +void output_slr_t::emit_state_shift(unsigned i, const symbol_t &token) { + auto it = generator->get_shifts().find(std::make_pair(i, token)); + if (it == generator->get_shifts().end()) + return; + + if (token.get_type() == SYMBOL_TYPE_CHAR) + std::fprintf(source_file, "\t\t\tcase '%c':\n", token.get_value()[0]); + else if (!token.get_value().empty()) + std::fprintf(source_file, "\t\t\tcase %s%s:\n", token_prefix(), token.get_value().c_str()); + else + std::fprintf(source_file, "\t\t\tcase 0:\n"); + + std::fprintf(source_file, "\t\t\t\tparser->stack[parser->top].value.token = *value;\n"); + std::fprintf(source_file, "\t\t\t\tparser->stack[++parser->top].state = %u;\n", unsigned(it->second)); + std::fprintf(source_file, "\t\t\t\treturn 1;\n\n"); +} + +void output_slr_t::emit_state_reduce(const item_t &item, const symbol_t &token, int rule_id) { + if (token.get_type() == SYMBOL_TYPE_CHAR) + std::fprintf(source_file, "\t\t\tcase '%c':\n", token.get_value()[0]); + else if (!token.get_value().empty()) + std::fprintf(source_file, "\t\t\tcase %s%s:\n", token_prefix(), token.get_value().c_str()); + else + std::fprintf(source_file, "\t\t\tcase 0:\n"); + + const auto &rhs = item.get_rhs(); + if (rhs.size()) + std::fprintf(source_file, "\t\t\t\tparser->top -= %u;\n", unsigned(rhs.size())); + + if (rule_id >= 0) { + const std::string &type = generator->get_grammar().get_nonterm_type(item.get_lhs()); + + std::fprintf(source_file, "\t\t\t\t"); + if (!type.empty()) + std::fprintf(source_file, "parser->stack[parser->top].value.symbol_%s = ", item.get_lhs().c_str()); + std::fprintf(source_file, "%sreduce_%i(", prefix(), rule_id); + + bool empty = true; + const auto &vars = generator->get_grammar().rules[rule_id].variables; + for (unsigned i = 0; i < vars.size(); i++) { + const std::string &var = vars[i]; + if (var.empty()) + continue; + + if (!empty) + std::fprintf(source_file, ", "); + + std::fprintf(source_file, "parser->stack[parser->top + %u].value.%s", i, symbol_values[rhs[i]].c_str()); + empty = false; + } + + for (const auto &arg : generator->get_grammar().extra_args) { + if (!empty) + std::fprintf(source_file, ", "); + + std::fprintf(source_file, "%s", arg.second.c_str()); + + empty = false; + } + + std::fprintf(source_file, ");\n"); + } + + std::vector> gotos; + + for (size_t i = 0; i < generator->get_state_count(); i++) { + auto it = generator->get_gotos().find(std::make_pair(i, item.get_lhs())); + if (it == generator->get_gotos().end()) + continue; + + gotos.emplace_back(i, it->second); + } + + if (gotos.size() == 1) { + std::fprintf(source_file, "\t\t\t\tparser->stack[++parser->top].state = %u;\n", gotos[0].second); + } + else { + std::fprintf(source_file, "\t\t\t\tswitch (parser->stack[parser->top].state) {\n"); + + for (size_t i = 0; i < generator->get_state_count(); i++) { + auto it = generator->get_gotos().find(std::make_pair(i, item.get_lhs())); + if (it == generator->get_gotos().end()) + continue; + + std::fprintf(source_file, "\t\t\t\tcase %u:\n", unsigned(i)); + std::fprintf(source_file, "\t\t\t\t\tparser->stack[++parser->top].state = %u;\n", unsigned(it->second)); + std::fprintf(source_file, "\t\t\t\t\tbreak;\n"); + } + + + std::fprintf(source_file, "\t\t\t\t}\n"); + } + + std::fprintf(source_file, "\t\t\t\tbreak;\n\n"); +} + +void output_slr_t::do_emit_state(unsigned i, const symbol_t &token) { + auto it = generator->get_reductions().find(std::make_pair(i, token)); + if (it == generator->get_reductions().end()) { + emit_state_shift(i, token); + } + else { + const rule_t &rule = generator->get_grammar().rules[it->second]; + emit_state_reduce(rule.item, token, rule.action.empty() ? -1 : it->second); + } +} + +void output_slr_t::emit_state(unsigned i) { + std::fprintf(source_file, "\t\tcase %u:\n", i); + std::fprintf(source_file, "\t\t\tswitch (token) {\n"); + + if (generator->get_shifts().find(std::make_pair(i, symbol_t::make_nonterm(""))) != generator->get_shifts().end()) { + std::fprintf(source_file, "\t\t\tcase 0:\n"); + std::fprintf(source_file, "\t\t\t\treturn 0;\n\n"); + } + + do_emit_state(i, symbol_t::empty()); + + for (const symbol_t &token : generator->get_terminals()) + do_emit_state(i, token); + + std::fprintf(source_file, "\t\t\tdefault:\n"); + std::fprintf(source_file, "\t\t\t\treturn -1;\n"); + + std::fprintf(source_file, "\t\t\t}\n"); + std::fprintf(source_file, "\t\t\tbreak;\n\n"); +} + +} diff --git a/src/output_slr.hpp b/src/output_slr.hpp new file mode 100644 index 0000000..3c6690a --- /dev/null +++ b/src/output_slr.hpp @@ -0,0 +1,56 @@ +/* + Copyright (c) 2015, Matthias Schiffer + All rights reserved. + + Redistribution and use in source and binary forms, with or without + modification, are permitted provided that the following conditions are met: + + 1. Redistributions of source code must retain the above copyright notice, + this list of conditions and the following disclaimer. + 2. Redistributions in binary form must reproduce the above copyright notice, + this list of conditions and the following disclaimer in the documentation + and/or other materials provided with the distribution. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE + DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE + FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR + SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER + CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, + OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +*/ + + +#pragma once + +#include "generator_slr.hpp" +#include "output.hpp" + + +namespace solar { + +class output_slr_t : public output_t { +private: + const generator_slr_t *generator; + + void emit_state_shift(unsigned i, const symbol_t &token); + void emit_state_reduce(const item_t &item, const symbol_t &token, int rule_id); + void do_emit_state(unsigned i, const symbol_t &token); + +protected: + virtual const generator_t * get_generator() { + return generator; + } + + virtual void emit_state(unsigned i); + +public: + output_slr_t(const generator_slr_t *generator0, const char *header, const char *source) : output_t(header, source), generator(generator0) { + initialize(); + } +}; + +} diff --git a/src/solar.cpp b/src/solar.cpp index e6c0b0a..e7eb00b 100644 --- a/src/solar.cpp +++ b/src/solar.cpp @@ -26,8 +26,8 @@ #include "lex.hpp" #include "parser.hpp" -#include "generator_lr0.hpp" -#include "output_lr0.hpp" +#include "generator_slr.hpp" +#include "output_slr.hpp" #include @@ -87,9 +87,9 @@ int main(int argc, char *argv[]) { if (!read_grammar(argv[1], &state)) return 1; - generator_lr0_t generator(state.get_grammar()); + generator_slr_t generator(state.get_grammar()); - output_lr0_t output(&generator, argv[3], argv[2]); + output_slr_t output(&generator, argv[3], argv[2]); output.write(); return 0; -- cgit v1.2.3