From 7a65ee0c61b04b09f10655f03be2fc082c8d5b2b Mon Sep 17 00:00:00 2001 From: Matthias Schiffer Date: Thu, 9 Apr 2015 04:46:39 +0200 Subject: Introduce rule_t and grammar_t --- src/generator.cpp | 15 ++++------ src/generator.hpp | 65 ++++-------------------------------------- src/grammar.hpp | 79 ++++++++++++++++++++++++++++++++++++++++++++++++++++ src/item.hpp | 4 +-- src/output.cpp | 50 ++++++++++++++++----------------- src/parser_state.cpp | 8 +++--- src/parser_state.hpp | 42 +++++----------------------- src/rule.hpp | 40 ++++++++++++++++++++++++++ src/solar.cpp | 7 +---- 9 files changed, 169 insertions(+), 141 deletions(-) create mode 100644 src/grammar.hpp create mode 100644 src/rule.hpp diff --git a/src/generator.cpp b/src/generator.cpp index 16f08d0..a0f88d8 100644 --- a/src/generator.cpp +++ b/src/generator.cpp @@ -36,7 +36,7 @@ std::set generator_t::get_set(const std::string &nonterm) { auto entries = nonterms.equal_range(nonterm); for (auto entry = entries.first; entry != entries.second; ++entry) - set.insert(std::get<0>(rules[entry->second])); + set.insert(grammar.rules[entry->second].item); return set; } @@ -117,15 +117,10 @@ void generator_t::generate_itemsets() { } } -generator_t::generator_t(const std::vector, std::string>> &rules0, - const std::map &nonterm_types0, - const std::map> &term_types0, - const std::string &header_block0, const std::string &source_block0, - const std::vector> &extra_args0) - : rules(rules0), nonterm_types(nonterm_types0), term_types(term_types0), - header_block(header_block0), source_block(source_block0), extra_args(extra_args0) { - for (size_t i = 0; i < rules.size(); i++) { - item_t rule = std::get<0>(rules[i]); +generator_t::generator_t(const grammar_t &grammar0) + : grammar(grammar0) { + for (size_t i = 0; i < grammar.rules.size(); i++) { + item_t rule = grammar.rules[i].item; nonterminals.insert(rule.get_lhs()); nonterms.insert(std::make_pair(rule.get_lhs(), i)); diff --git a/src/generator.hpp b/src/generator.hpp index b0bd4c9..576fccb 100644 --- a/src/generator.hpp +++ b/src/generator.hpp @@ -26,7 +26,7 @@ #pragma once -#include "item.hpp" +#include "grammar.hpp" #include #include @@ -44,7 +44,8 @@ public: }; private: - std::vector, std::string>> rules; + grammar_t grammar; + std::map rule_ids; std::multimap nonterms; @@ -60,14 +61,6 @@ private: std::set shift_conflicts; - std::map nonterm_types; - std::map> term_types; - - std::string header_block; - std::string source_block; - - std::vector> extra_args; - void close_set(std::set *set); std::set get_set(const std::string &nonterm); @@ -100,34 +93,8 @@ private: void generate_itemsets(); public: - const std::string & get_nonterm_type(const std::string &sym) const { - static const std::string empty; - - auto it = nonterm_types.find(sym); - if (it == nonterm_types.end()) - return empty; - else - return it->second; - } - - const std::pair & get_term_type(const symbol_t &sym) const { - static const std::pair empty; - - auto it = term_types.find(sym); - if (it == term_types.end()) - return empty; - else - return it->second; - } - - const std::string & get_type(const symbol_t &sym) const { - switch (sym.get_type()) { - case SYMBOL_TYPE_NONTERM: - return get_nonterm_type(sym.get_value()); - - default: - return get_term_type(sym).first; - } + const grammar_t & get_grammar() const { + return grammar; } const std::set & get_nonterminals() const { @@ -142,10 +109,6 @@ public: return itemsets.size(); } - const std::vector, std::string>> & get_rules() const { - return rules; - } - const std::map & get_reductions() const { return reductions; } @@ -158,24 +121,8 @@ public: return gotos; } - const std::string & get_header_block() const { - return header_block; - } - - const std::string & get_source_block() const { - return source_block; - } - - const std::vector> & get_extra_args() const { - return extra_args; - } - - generator_t(const std::vector, std::string>> &rules0, - const std::map &nonterm_types0, - const std::map> &term_types0, - const std::string &header_block0, const std::string &source_block0, - const std::vector> &extra_args0); + generator_t(const grammar_t &grammar0); }; } diff --git a/src/grammar.hpp b/src/grammar.hpp new file mode 100644 index 0000000..e603d00 --- /dev/null +++ b/src/grammar.hpp @@ -0,0 +1,79 @@ +/* + Copyright (c) 2015, Matthias Schiffer + All rights reserved. + + Redistribution and use in source and binary forms, with or without + modification, are permitted provided that the following conditions are met: + + 1. Redistributions of source code must retain the above copyright notice, + this list of conditions and the following disclaimer. + 2. Redistributions in binary form must reproduce the above copyright notice, + this list of conditions and the following disclaimer in the documentation + and/or other materials provided with the distribution. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE + DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE + FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR + SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER + CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, + OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +*/ + + +#pragma once + +#include "rule.hpp" + +#include + + +namespace solar { + +struct grammar_t { + std::vector rules; + + std::map nonterm_types; + std::map> term_types; + + std::string header_block; + std::string source_block; + + std::vector> extra_args; + + + const std::string & get_nonterm_type(const std::string &sym) const { + static const std::string empty; + + auto it = nonterm_types.find(sym); + if (it == nonterm_types.end()) + return empty; + else + return it->second; + } + + const std::pair & get_term_type(const symbol_t &sym) const { + static const std::pair empty; + + auto it = term_types.find(sym); + if (it == term_types.end()) + return empty; + else + return it->second; + } + + const std::string & get_type(const symbol_t &sym) const { + switch (sym.get_type()) { + case SYMBOL_TYPE_NONTERM: + return get_nonterm_type(sym.get_value()); + + default: + return get_term_type(sym).first; + } + } +}; + +} diff --git a/src/item.hpp b/src/item.hpp index ae282a9..59d694e 100644 --- a/src/item.hpp +++ b/src/item.hpp @@ -34,8 +34,8 @@ namespace solar { struct item_t : public std::tuple, unsigned> { - item_t(const std::string &lhs) - : std::tuple, unsigned>(lhs, std::vector(), 0) {} + item_t(const std::string &lhs, const std::vector &rhs = std::vector()) + : std::tuple, unsigned>(lhs, rhs, 0) {} const std::string & get_lhs() const { return std::get<0>(*this); diff --git a/src/output.cpp b/src/output.cpp index cc9d5f3..f15aac0 100644 --- a/src/output.cpp +++ b/src/output.cpp @@ -54,7 +54,7 @@ output_t::output_t(const generator_t *generator0, const char *header, const char if (term.get_type() == SYMBOL_TYPE_TERM) tokens.insert(std::make_pair(term.get_value(), tokens.size())); - symbol_values.insert(std::make_pair(term, "token." + generator->get_term_type(term).second)); + symbol_values.insert(std::make_pair(term, "token." + generator->get_grammar().get_term_type(term).second)); } } @@ -80,8 +80,8 @@ void output_t::emit_token_value() { std::map token_values; - for (const auto &term : generator->get_terminals()) { - const auto &type = generator->get_term_type(term); + for (const symbol_t &term : generator->get_terminals()) { + const auto &type = generator->get_grammar().get_term_type(term); if (!type.first.empty()) token_values.insert(std::make_pair(type.second, type.first)); } @@ -95,8 +95,8 @@ void output_t::emit_token_value() { void output_t::emit_header() { std::fprintf(header_file, "#pragma once\n\n"); - if (!generator->get_header_block().empty()) - std::fprintf(header_file, "%s\n", generator->get_header_block().c_str()); + if (!generator->get_grammar().header_block.empty()) + std::fprintf(header_file, "%s\n", generator->get_grammar().header_block.c_str()); emit_tokens(); emit_token_value(); @@ -108,19 +108,19 @@ void output_t::emit_header() { std::fprintf(header_file, "int %spush(%scontext_t *parser, int token, const %stoken_value_t *value", prefix(), prefix(), prefix()); - for (const auto &arg : generator->get_extra_args()) + for (const auto &arg : generator->get_grammar().extra_args) std::fprintf(header_file, ", %s %s", arg.first.c_str(), arg.second.c_str()); std::fprintf(header_file, ");\n"); } void output_t::emit_reduction(unsigned rule_id) { - const auto &rule = generator->get_rules()[rule_id]; + const rule_t &rule = generator->get_grammar().rules[rule_id]; std::fprintf(source_file, "static inline "); - const item_t &item = std::get<0>(rule); - const std::string &type = generator->get_nonterm_type(item.get_lhs()); + const item_t &item = rule.item; + const std::string &type = generator->get_grammar().get_nonterm_type(item.get_lhs()); if (type.empty()) std::fprintf(source_file, "void"); else @@ -129,8 +129,8 @@ void output_t::emit_reduction(unsigned rule_id) { std::fprintf(source_file, " %sreduce_%u(", prefix(), rule_id); bool empty = true; - for (unsigned i = 0; i < std::get<1>(rule).size(); i++) { - const std::string &var = std::get<1>(rule)[i]; + for (unsigned i = 0; i < rule.variables.size(); i++) { + const std::string &var = rule.variables[i]; if (var.empty()) continue; @@ -138,12 +138,12 @@ void output_t::emit_reduction(unsigned rule_id) { if (!empty) std::fprintf(source_file, ", "); - std::fprintf(source_file, "%s %s", generator->get_type(item.get_rhs()[i]).c_str(), var.c_str()); + std::fprintf(source_file, "%s %s", generator->get_grammar().get_type(item.get_rhs()[i]).c_str(), var.c_str()); empty = false; } - for (const auto &arg : generator->get_extra_args()) { + for (const auto &arg : generator->get_grammar().extra_args) { if (!empty) std::fprintf(source_file, ", "); @@ -156,16 +156,16 @@ void output_t::emit_reduction(unsigned rule_id) { std::fprintf(source_file, "void"); std::fprintf(source_file, ") {"); - std::fprintf(source_file, "%s", std::get<2>(rule).c_str()); + std::fprintf(source_file, "%s", rule.action.c_str()); std::fprintf(source_file, "}\n\n"); } void output_t::emit_reductions() { - const auto &rules = generator->get_rules(); + const auto &rules = generator->get_grammar().rules; for (size_t i = 0; i < rules.size(); i++) { - if (!std::get<2>(rules[i]).empty()) + if (!rules[i].action.empty()) emit_reduction(i); } } @@ -205,7 +205,7 @@ void output_t::emit_state_reduce(const item_t &item, int rule_id) { std::fprintf(source_file, "\t\t\tparser->top -= %u;\n", unsigned(rhs.size())); if (rule_id >= 0) { - const std::string &type = generator->get_nonterm_type(item.get_lhs()); + const std::string &type = generator->get_grammar().get_nonterm_type(item.get_lhs()); std::fprintf(source_file, "\t\t\t"); if (!type.empty()) @@ -213,7 +213,7 @@ void output_t::emit_state_reduce(const item_t &item, int rule_id) { std::fprintf(source_file, "%sreduce_%i(", prefix(), rule_id); bool empty = true; - const auto &vars = std::get<1>(generator->get_rules()[rule_id]); + const auto &vars = generator->get_grammar().rules[rule_id].variables; for (unsigned i = 0; i < vars.size(); i++) { const std::string &var = vars[i]; if (var.empty()) @@ -226,7 +226,7 @@ void output_t::emit_state_reduce(const item_t &item, int rule_id) { empty = false; } - for (const auto &arg : generator->get_extra_args()) { + for (const auto &arg : generator->get_grammar().extra_args) { if (!empty) std::fprintf(source_file, ", "); @@ -277,8 +277,8 @@ void output_t::emit_state(unsigned i) { emit_state_shift(i); } else { - const auto &rule = generator->get_rules()[it->second]; - emit_state_reduce(std::get<0>(rule), std::get<2>(rule).empty() ? -1 : it->second); + const rule_t &rule = generator->get_grammar().rules[it->second]; + emit_state_reduce(rule.item, rule.action.empty() ? -1 : it->second); } std::fprintf(source_file, "\t\t\tbreak;\n\n"); @@ -305,14 +305,14 @@ void output_t::emit_header_include() { void output_t::emit_source() { emit_header_include(); - if (!generator->get_source_block().empty()) - std::fprintf(source_file, "%s\n\n", generator->get_source_block().c_str()); + if (!generator->get_grammar().source_block.empty()) + std::fprintf(source_file, "%s\n\n", generator->get_grammar().source_block.c_str()); std::fprintf(source_file, "typedef union %ssymbol_value {\n", prefix()); std::fprintf(source_file, "\t%stoken_value_t token;\n", prefix()); for (const auto &nonterm : generator->get_nonterminals()) { - const std::string &type = generator->get_nonterm_type(nonterm); + const std::string &type = generator->get_grammar().get_nonterm_type(nonterm); if (!type.empty()) std::fprintf(source_file, "\t%s symbol_%s;\n", type.c_str(), nonterm.c_str()); @@ -344,7 +344,7 @@ void output_t::emit_source() { emit_reductions(); std::fprintf(source_file, "int %spush(%scontext_t *parser, int token, const %stoken_value_t *value", prefix(), prefix(), prefix()); - for (const auto &arg : generator->get_extra_args()) + for (const auto &arg : generator->get_grammar().extra_args) std::fprintf(source_file, ", %s %s", arg.first.c_str(), arg.second.c_str()); std::fprintf(source_file, ") {\n"); diff --git a/src/parser_state.cpp b/src/parser_state.cpp index 5ec8d08..f0e9bdd 100644 --- a/src/parser_state.cpp +++ b/src/parser_state.cpp @@ -30,7 +30,7 @@ namespace solar { void parser_state_t::new_rule(const char *nonterm) { - if (rules.empty()) { + if (grammar.rules.empty()) { // start rule add_rule_nonterminal(nonterm); add_rule(); @@ -56,7 +56,7 @@ void parser_state_t::add_rule_terminal(unsigned char term) { } void parser_state_t::add_rule(const std::string &action) { - rules.emplace_back(std::move(current), std::move(current_vars), action); + grammar.rules.emplace_back(rule_t{std::move(current), std::move(current_vars), action}); } void parser_state_t::add_rule_var(const char *var) { @@ -72,7 +72,7 @@ void parser_state_t::add_type_terminal(const char *term) { } void parser_state_t::set_type_nonterminal(const char *type) { - nonterm_types.insert(std::make_pair(current_var, type)); + grammar.nonterm_types.insert(std::make_pair(current_var, type)); } void parser_state_t::set_type_terminal(const char *type) { @@ -80,7 +80,7 @@ void parser_state_t::set_type_terminal(const char *type) { } void parser_state_t::set_type_terminal_name(const char *name) { - term_types.insert(std::make_pair(symbol_t::make_term(current_var.c_str()), std::make_pair(current_type, name))); + grammar.term_types.insert(std::make_pair(symbol_t::make_term(current_var.c_str()), std::make_pair(current_type, name))); } } diff --git a/src/parser_state.hpp b/src/parser_state.hpp index 5dcce54..4fd057d 100644 --- a/src/parser_state.hpp +++ b/src/parser_state.hpp @@ -26,63 +26,35 @@ #pragma once -#include "item.hpp" - -#include +#include "grammar.hpp" namespace solar { class parser_state_t { private: - std::vector, std::string>> rules; - std::map nonterm_types; - std::map> term_types; + grammar_t grammar; item_t current; std::vector current_vars; std::string current_var; std::string current_type; - std::string header_block; - std::string source_block; - std::string current_extra_arg; - std::vector> extra_args; public: parser_state_t() : current("") {} - const std::vector, std::string>> & get_rules() const { - return rules; - } - - const std::map & get_nonterm_types() const { - return nonterm_types; - } - - const std::map> & get_term_types() const { - return term_types; - } - - const std::string & get_header_block() const { - return header_block; + const grammar_t & get_grammar() const { + return grammar; } void set_header_block(const char *value) { - header_block = value; - } - - const std::string & get_source_block() const { - return source_block; + grammar.header_block = value; } void set_source_block(const char *value) { - source_block = value; - } - - const std::vector> & get_extra_args() const { - return extra_args; + grammar.source_block = value; } void add_extra_arg(const char *type) { @@ -90,7 +62,7 @@ public: } void set_extra_arg_name(const char *name) { - extra_args.emplace_back(current_extra_arg, name); + grammar.extra_args.emplace_back(current_extra_arg, name); } diff --git a/src/rule.hpp b/src/rule.hpp new file mode 100644 index 0000000..5b85ced --- /dev/null +++ b/src/rule.hpp @@ -0,0 +1,40 @@ +/* + Copyright (c) 2015, Matthias Schiffer + All rights reserved. + + Redistribution and use in source and binary forms, with or without + modification, are permitted provided that the following conditions are met: + + 1. Redistributions of source code must retain the above copyright notice, + this list of conditions and the following disclaimer. + 2. Redistributions in binary form must reproduce the above copyright notice, + this list of conditions and the following disclaimer in the documentation + and/or other materials provided with the distribution. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE + DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE + FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR + SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER + CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, + OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +*/ + + +#pragma once + +#include "item.hpp" + + +namespace solar { + +struct rule_t { + item_t item; + std::vector variables; + std::string action; +}; + +} diff --git a/src/solar.cpp b/src/solar.cpp index 7e211a5..68a1df6 100644 --- a/src/solar.cpp +++ b/src/solar.cpp @@ -87,12 +87,7 @@ int main(int argc, char *argv[]) { if (!read_grammar(argv[1], &state)) return 1; - generator_t generator(state.get_rules(), - state.get_nonterm_types(), - state.get_term_types(), - state.get_header_block(), - state.get_source_block(), - state.get_extra_args()); + generator_t generator(state.get_grammar()); output_t output(&generator, argv[3], argv[2]); output.write(); -- cgit v1.2.3