Introduce rule_t and grammar_t

This commit is contained in:
Matthias Schiffer 2015-04-09 04:46:39 +02:00
parent ffe1e7bb96
commit 7a65ee0c61
9 changed files with 169 additions and 141 deletions

View file

@ -36,7 +36,7 @@ std::set<item_t> generator_t::get_set(const std::string &nonterm) {
auto entries = nonterms.equal_range(nonterm); auto entries = nonterms.equal_range(nonterm);
for (auto entry = entries.first; entry != entries.second; ++entry) for (auto entry = entries.first; entry != entries.second; ++entry)
set.insert(std::get<0>(rules[entry->second])); set.insert(grammar.rules[entry->second].item);
return set; return set;
} }
@ -117,15 +117,10 @@ void generator_t::generate_itemsets() {
} }
} }
generator_t::generator_t(const std::vector<std::tuple<item_t, std::vector<std::string>, std::string>> &rules0, generator_t::generator_t(const grammar_t &grammar0)
const std::map<std::string, std::string> &nonterm_types0, : grammar(grammar0) {
const std::map<symbol_t, std::pair<std::string, std::string>> &term_types0, for (size_t i = 0; i < grammar.rules.size(); i++) {
const std::string &header_block0, const std::string &source_block0, item_t rule = grammar.rules[i].item;
const std::vector<std::pair<std::string, std::string>> &extra_args0)
: rules(rules0), nonterm_types(nonterm_types0), term_types(term_types0),
header_block(header_block0), source_block(source_block0), extra_args(extra_args0) {
for (size_t i = 0; i < rules.size(); i++) {
item_t rule = std::get<0>(rules[i]);
nonterminals.insert(rule.get_lhs()); nonterminals.insert(rule.get_lhs());
nonterms.insert(std::make_pair(rule.get_lhs(), i)); nonterms.insert(std::make_pair(rule.get_lhs(), i));

View file

@ -26,7 +26,7 @@
#pragma once #pragma once
#include "item.hpp" #include "grammar.hpp"
#include <stdexcept> #include <stdexcept>
#include <set> #include <set>
@ -44,7 +44,8 @@ public:
}; };
private: private:
std::vector<std::tuple<item_t, std::vector<std::string>, std::string>> rules; grammar_t grammar;
std::map<item_t, size_t> rule_ids; std::map<item_t, size_t> rule_ids;
std::multimap<std::string, size_t> nonterms; std::multimap<std::string, size_t> nonterms;
@ -60,14 +61,6 @@ private:
std::set<size_t> shift_conflicts; std::set<size_t> shift_conflicts;
std::map<std::string, std::string> nonterm_types;
std::map<symbol_t, std::pair<std::string, std::string>> term_types;
std::string header_block;
std::string source_block;
std::vector<std::pair<std::string, std::string>> extra_args;
void close_set(std::set<item_t> *set); void close_set(std::set<item_t> *set);
std::set<item_t> get_set(const std::string &nonterm); std::set<item_t> get_set(const std::string &nonterm);
@ -100,34 +93,8 @@ private:
void generate_itemsets(); void generate_itemsets();
public: public:
const std::string & get_nonterm_type(const std::string &sym) const { const grammar_t & get_grammar() const {
static const std::string empty; return grammar;
auto it = nonterm_types.find(sym);
if (it == nonterm_types.end())
return empty;
else
return it->second;
}
const std::pair<std::string, std::string> & get_term_type(const symbol_t &sym) const {
static const std::pair<std::string, std::string> empty;
auto it = term_types.find(sym);
if (it == term_types.end())
return empty;
else
return it->second;
}
const std::string & get_type(const symbol_t &sym) const {
switch (sym.get_type()) {
case SYMBOL_TYPE_NONTERM:
return get_nonterm_type(sym.get_value());
default:
return get_term_type(sym).first;
}
} }
const std::set<std::string> & get_nonterminals() const { const std::set<std::string> & get_nonterminals() const {
@ -142,10 +109,6 @@ public:
return itemsets.size(); return itemsets.size();
} }
const std::vector<std::tuple<item_t, std::vector<std::string>, std::string>> & get_rules() const {
return rules;
}
const std::map<size_t, size_t> & get_reductions() const { const std::map<size_t, size_t> & get_reductions() const {
return reductions; return reductions;
} }
@ -158,24 +121,8 @@ public:
return gotos; return gotos;
} }
const std::string & get_header_block() const {
return header_block;
}
const std::string & get_source_block() const { generator_t(const grammar_t &grammar0);
return source_block;
}
const std::vector<std::pair<std::string, std::string>> & get_extra_args() const {
return extra_args;
}
generator_t(const std::vector<std::tuple<item_t, std::vector<std::string>, std::string>> &rules0,
const std::map<std::string, std::string> &nonterm_types0,
const std::map<symbol_t, std::pair<std::string, std::string>> &term_types0,
const std::string &header_block0, const std::string &source_block0,
const std::vector<std::pair<std::string, std::string>> &extra_args0);
}; };
} }

79
src/grammar.hpp Normal file
View file

@ -0,0 +1,79 @@
/*
Copyright (c) 2015, Matthias Schiffer <mschiffer@universe-factory.net>
All rights reserved.
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions are met:
1. Redistributions of source code must retain the above copyright notice,
this list of conditions and the following disclaimer.
2. Redistributions in binary form must reproduce the above copyright notice,
this list of conditions and the following disclaimer in the documentation
and/or other materials provided with the distribution.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
#pragma once
#include "rule.hpp"
#include <map>
namespace solar {
struct grammar_t {
std::vector<rule_t> rules;
std::map<std::string, std::string> nonterm_types;
std::map<symbol_t, std::pair<std::string, std::string>> term_types;
std::string header_block;
std::string source_block;
std::vector<std::pair<std::string, std::string>> extra_args;
const std::string & get_nonterm_type(const std::string &sym) const {
static const std::string empty;
auto it = nonterm_types.find(sym);
if (it == nonterm_types.end())
return empty;
else
return it->second;
}
const std::pair<std::string, std::string> & get_term_type(const symbol_t &sym) const {
static const std::pair<std::string, std::string> empty;
auto it = term_types.find(sym);
if (it == term_types.end())
return empty;
else
return it->second;
}
const std::string & get_type(const symbol_t &sym) const {
switch (sym.get_type()) {
case SYMBOL_TYPE_NONTERM:
return get_nonterm_type(sym.get_value());
default:
return get_term_type(sym).first;
}
}
};
}

View file

@ -34,8 +34,8 @@
namespace solar { namespace solar {
struct item_t : public std::tuple<std::string, std::vector<symbol_t>, unsigned> { struct item_t : public std::tuple<std::string, std::vector<symbol_t>, unsigned> {
item_t(const std::string &lhs) item_t(const std::string &lhs, const std::vector<symbol_t> &rhs = std::vector<symbol_t>())
: std::tuple<std::string, std::vector<symbol_t>, unsigned>(lhs, std::vector<symbol_t>(), 0) {} : std::tuple<std::string, std::vector<symbol_t>, unsigned>(lhs, rhs, 0) {}
const std::string & get_lhs() const { const std::string & get_lhs() const {
return std::get<0>(*this); return std::get<0>(*this);

View file

@ -54,7 +54,7 @@ output_t::output_t(const generator_t *generator0, const char *header, const char
if (term.get_type() == SYMBOL_TYPE_TERM) if (term.get_type() == SYMBOL_TYPE_TERM)
tokens.insert(std::make_pair(term.get_value(), tokens.size())); tokens.insert(std::make_pair(term.get_value(), tokens.size()));
symbol_values.insert(std::make_pair(term, "token." + generator->get_term_type(term).second)); symbol_values.insert(std::make_pair(term, "token." + generator->get_grammar().get_term_type(term).second));
} }
} }
@ -80,8 +80,8 @@ void output_t::emit_token_value() {
std::map<std::string, std::string> token_values; std::map<std::string, std::string> token_values;
for (const auto &term : generator->get_terminals()) { for (const symbol_t &term : generator->get_terminals()) {
const auto &type = generator->get_term_type(term); const auto &type = generator->get_grammar().get_term_type(term);
if (!type.first.empty()) if (!type.first.empty())
token_values.insert(std::make_pair(type.second, type.first)); token_values.insert(std::make_pair(type.second, type.first));
} }
@ -95,8 +95,8 @@ void output_t::emit_token_value() {
void output_t::emit_header() { void output_t::emit_header() {
std::fprintf(header_file, "#pragma once\n\n"); std::fprintf(header_file, "#pragma once\n\n");
if (!generator->get_header_block().empty()) if (!generator->get_grammar().header_block.empty())
std::fprintf(header_file, "%s\n", generator->get_header_block().c_str()); std::fprintf(header_file, "%s\n", generator->get_grammar().header_block.c_str());
emit_tokens(); emit_tokens();
emit_token_value(); emit_token_value();
@ -108,19 +108,19 @@ void output_t::emit_header() {
std::fprintf(header_file, "int %spush(%scontext_t *parser, int token, const %stoken_value_t *value", prefix(), prefix(), prefix()); std::fprintf(header_file, "int %spush(%scontext_t *parser, int token, const %stoken_value_t *value", prefix(), prefix(), prefix());
for (const auto &arg : generator->get_extra_args()) for (const auto &arg : generator->get_grammar().extra_args)
std::fprintf(header_file, ", %s %s", arg.first.c_str(), arg.second.c_str()); std::fprintf(header_file, ", %s %s", arg.first.c_str(), arg.second.c_str());
std::fprintf(header_file, ");\n"); std::fprintf(header_file, ");\n");
} }
void output_t::emit_reduction(unsigned rule_id) { void output_t::emit_reduction(unsigned rule_id) {
const auto &rule = generator->get_rules()[rule_id]; const rule_t &rule = generator->get_grammar().rules[rule_id];
std::fprintf(source_file, "static inline "); std::fprintf(source_file, "static inline ");
const item_t &item = std::get<0>(rule); const item_t &item = rule.item;
const std::string &type = generator->get_nonterm_type(item.get_lhs()); const std::string &type = generator->get_grammar().get_nonterm_type(item.get_lhs());
if (type.empty()) if (type.empty())
std::fprintf(source_file, "void"); std::fprintf(source_file, "void");
else else
@ -129,8 +129,8 @@ void output_t::emit_reduction(unsigned rule_id) {
std::fprintf(source_file, " %sreduce_%u(", prefix(), rule_id); std::fprintf(source_file, " %sreduce_%u(", prefix(), rule_id);
bool empty = true; bool empty = true;
for (unsigned i = 0; i < std::get<1>(rule).size(); i++) { for (unsigned i = 0; i < rule.variables.size(); i++) {
const std::string &var = std::get<1>(rule)[i]; const std::string &var = rule.variables[i];
if (var.empty()) if (var.empty())
continue; continue;
@ -138,12 +138,12 @@ void output_t::emit_reduction(unsigned rule_id) {
if (!empty) if (!empty)
std::fprintf(source_file, ", "); std::fprintf(source_file, ", ");
std::fprintf(source_file, "%s %s", generator->get_type(item.get_rhs()[i]).c_str(), var.c_str()); std::fprintf(source_file, "%s %s", generator->get_grammar().get_type(item.get_rhs()[i]).c_str(), var.c_str());
empty = false; empty = false;
} }
for (const auto &arg : generator->get_extra_args()) { for (const auto &arg : generator->get_grammar().extra_args) {
if (!empty) if (!empty)
std::fprintf(source_file, ", "); std::fprintf(source_file, ", ");
@ -156,16 +156,16 @@ void output_t::emit_reduction(unsigned rule_id) {
std::fprintf(source_file, "void"); std::fprintf(source_file, "void");
std::fprintf(source_file, ") {"); std::fprintf(source_file, ") {");
std::fprintf(source_file, "%s", std::get<2>(rule).c_str()); std::fprintf(source_file, "%s", rule.action.c_str());
std::fprintf(source_file, "}\n\n"); std::fprintf(source_file, "}\n\n");
} }
void output_t::emit_reductions() { void output_t::emit_reductions() {
const auto &rules = generator->get_rules(); const auto &rules = generator->get_grammar().rules;
for (size_t i = 0; i < rules.size(); i++) { for (size_t i = 0; i < rules.size(); i++) {
if (!std::get<2>(rules[i]).empty()) if (!rules[i].action.empty())
emit_reduction(i); emit_reduction(i);
} }
} }
@ -205,7 +205,7 @@ void output_t::emit_state_reduce(const item_t &item, int rule_id) {
std::fprintf(source_file, "\t\t\tparser->top -= %u;\n", unsigned(rhs.size())); std::fprintf(source_file, "\t\t\tparser->top -= %u;\n", unsigned(rhs.size()));
if (rule_id >= 0) { if (rule_id >= 0) {
const std::string &type = generator->get_nonterm_type(item.get_lhs()); const std::string &type = generator->get_grammar().get_nonterm_type(item.get_lhs());
std::fprintf(source_file, "\t\t\t"); std::fprintf(source_file, "\t\t\t");
if (!type.empty()) if (!type.empty())
@ -213,7 +213,7 @@ void output_t::emit_state_reduce(const item_t &item, int rule_id) {
std::fprintf(source_file, "%sreduce_%i(", prefix(), rule_id); std::fprintf(source_file, "%sreduce_%i(", prefix(), rule_id);
bool empty = true; bool empty = true;
const auto &vars = std::get<1>(generator->get_rules()[rule_id]); const auto &vars = generator->get_grammar().rules[rule_id].variables;
for (unsigned i = 0; i < vars.size(); i++) { for (unsigned i = 0; i < vars.size(); i++) {
const std::string &var = vars[i]; const std::string &var = vars[i];
if (var.empty()) if (var.empty())
@ -226,7 +226,7 @@ void output_t::emit_state_reduce(const item_t &item, int rule_id) {
empty = false; empty = false;
} }
for (const auto &arg : generator->get_extra_args()) { for (const auto &arg : generator->get_grammar().extra_args) {
if (!empty) if (!empty)
std::fprintf(source_file, ", "); std::fprintf(source_file, ", ");
@ -277,8 +277,8 @@ void output_t::emit_state(unsigned i) {
emit_state_shift(i); emit_state_shift(i);
} }
else { else {
const auto &rule = generator->get_rules()[it->second]; const rule_t &rule = generator->get_grammar().rules[it->second];
emit_state_reduce(std::get<0>(rule), std::get<2>(rule).empty() ? -1 : it->second); emit_state_reduce(rule.item, rule.action.empty() ? -1 : it->second);
} }
std::fprintf(source_file, "\t\t\tbreak;\n\n"); std::fprintf(source_file, "\t\t\tbreak;\n\n");
@ -305,14 +305,14 @@ void output_t::emit_header_include() {
void output_t::emit_source() { void output_t::emit_source() {
emit_header_include(); emit_header_include();
if (!generator->get_source_block().empty()) if (!generator->get_grammar().source_block.empty())
std::fprintf(source_file, "%s\n\n", generator->get_source_block().c_str()); std::fprintf(source_file, "%s\n\n", generator->get_grammar().source_block.c_str());
std::fprintf(source_file, "typedef union %ssymbol_value {\n", prefix()); std::fprintf(source_file, "typedef union %ssymbol_value {\n", prefix());
std::fprintf(source_file, "\t%stoken_value_t token;\n", prefix()); std::fprintf(source_file, "\t%stoken_value_t token;\n", prefix());
for (const auto &nonterm : generator->get_nonterminals()) { for (const auto &nonterm : generator->get_nonterminals()) {
const std::string &type = generator->get_nonterm_type(nonterm); const std::string &type = generator->get_grammar().get_nonterm_type(nonterm);
if (!type.empty()) if (!type.empty())
std::fprintf(source_file, "\t%s symbol_%s;\n", type.c_str(), nonterm.c_str()); std::fprintf(source_file, "\t%s symbol_%s;\n", type.c_str(), nonterm.c_str());
@ -344,7 +344,7 @@ void output_t::emit_source() {
emit_reductions(); emit_reductions();
std::fprintf(source_file, "int %spush(%scontext_t *parser, int token, const %stoken_value_t *value", prefix(), prefix(), prefix()); std::fprintf(source_file, "int %spush(%scontext_t *parser, int token, const %stoken_value_t *value", prefix(), prefix(), prefix());
for (const auto &arg : generator->get_extra_args()) for (const auto &arg : generator->get_grammar().extra_args)
std::fprintf(source_file, ", %s %s", arg.first.c_str(), arg.second.c_str()); std::fprintf(source_file, ", %s %s", arg.first.c_str(), arg.second.c_str());
std::fprintf(source_file, ") {\n"); std::fprintf(source_file, ") {\n");

View file

@ -30,7 +30,7 @@
namespace solar { namespace solar {
void parser_state_t::new_rule(const char *nonterm) { void parser_state_t::new_rule(const char *nonterm) {
if (rules.empty()) { if (grammar.rules.empty()) {
// start rule // start rule
add_rule_nonterminal(nonterm); add_rule_nonterminal(nonterm);
add_rule(); add_rule();
@ -56,7 +56,7 @@ void parser_state_t::add_rule_terminal(unsigned char term) {
} }
void parser_state_t::add_rule(const std::string &action) { void parser_state_t::add_rule(const std::string &action) {
rules.emplace_back(std::move(current), std::move(current_vars), action); grammar.rules.emplace_back(rule_t{std::move(current), std::move(current_vars), action});
} }
void parser_state_t::add_rule_var(const char *var) { void parser_state_t::add_rule_var(const char *var) {
@ -72,7 +72,7 @@ void parser_state_t::add_type_terminal(const char *term) {
} }
void parser_state_t::set_type_nonterminal(const char *type) { void parser_state_t::set_type_nonterminal(const char *type) {
nonterm_types.insert(std::make_pair(current_var, type)); grammar.nonterm_types.insert(std::make_pair(current_var, type));
} }
void parser_state_t::set_type_terminal(const char *type) { void parser_state_t::set_type_terminal(const char *type) {
@ -80,7 +80,7 @@ void parser_state_t::set_type_terminal(const char *type) {
} }
void parser_state_t::set_type_terminal_name(const char *name) { void parser_state_t::set_type_terminal_name(const char *name) {
term_types.insert(std::make_pair(symbol_t::make_term(current_var.c_str()), std::make_pair(current_type, name))); grammar.term_types.insert(std::make_pair(symbol_t::make_term(current_var.c_str()), std::make_pair(current_type, name)));
} }
} }

View file

@ -26,63 +26,35 @@
#pragma once #pragma once
#include "item.hpp" #include "grammar.hpp"
#include <map>
namespace solar { namespace solar {
class parser_state_t { class parser_state_t {
private: private:
std::vector<std::tuple<item_t, std::vector<std::string>, std::string>> rules; grammar_t grammar;
std::map<std::string, std::string> nonterm_types;
std::map<symbol_t, std::pair<std::string, std::string>> term_types;
item_t current; item_t current;
std::vector<std::string> current_vars; std::vector<std::string> current_vars;
std::string current_var; std::string current_var;
std::string current_type; std::string current_type;
std::string header_block;
std::string source_block;
std::string current_extra_arg; std::string current_extra_arg;
std::vector<std::pair<std::string, std::string>> extra_args;
public: public:
parser_state_t() : current("") {} parser_state_t() : current("") {}
const std::vector<std::tuple<item_t, std::vector<std::string>, std::string>> & get_rules() const { const grammar_t & get_grammar() const {
return rules; return grammar;
}
const std::map<std::string, std::string> & get_nonterm_types() const {
return nonterm_types;
}
const std::map<symbol_t, std::pair<std::string, std::string>> & get_term_types() const {
return term_types;
}
const std::string & get_header_block() const {
return header_block;
} }
void set_header_block(const char *value) { void set_header_block(const char *value) {
header_block = value; grammar.header_block = value;
}
const std::string & get_source_block() const {
return source_block;
} }
void set_source_block(const char *value) { void set_source_block(const char *value) {
source_block = value; grammar.source_block = value;
}
const std::vector<std::pair<std::string, std::string>> & get_extra_args() const {
return extra_args;
} }
void add_extra_arg(const char *type) { void add_extra_arg(const char *type) {
@ -90,7 +62,7 @@ public:
} }
void set_extra_arg_name(const char *name) { void set_extra_arg_name(const char *name) {
extra_args.emplace_back(current_extra_arg, name); grammar.extra_args.emplace_back(current_extra_arg, name);
} }

40
src/rule.hpp Normal file
View file

@ -0,0 +1,40 @@
/*
Copyright (c) 2015, Matthias Schiffer <mschiffer@universe-factory.net>
All rights reserved.
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions are met:
1. Redistributions of source code must retain the above copyright notice,
this list of conditions and the following disclaimer.
2. Redistributions in binary form must reproduce the above copyright notice,
this list of conditions and the following disclaimer in the documentation
and/or other materials provided with the distribution.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
#pragma once
#include "item.hpp"
namespace solar {
struct rule_t {
item_t item;
std::vector<std::string> variables;
std::string action;
};
}

View file

@ -87,12 +87,7 @@ int main(int argc, char *argv[]) {
if (!read_grammar(argv[1], &state)) if (!read_grammar(argv[1], &state))
return 1; return 1;
generator_t generator(state.get_rules(), generator_t generator(state.get_grammar());
state.get_nonterm_types(),
state.get_term_types(),
state.get_header_block(),
state.get_source_block(),
state.get_extra_args());
output_t output(&generator, argv[3], argv[2]); output_t output(&generator, argv[3], argv[2]);
output.write(); output.write();