Introduce rule_t and grammar_t

This commit is contained in:
Matthias Schiffer 2015-04-09 04:46:39 +02:00
parent ffe1e7bb96
commit 7a65ee0c61
9 changed files with 169 additions and 141 deletions

View file

@ -36,7 +36,7 @@ std::set<item_t> generator_t::get_set(const std::string &nonterm) {
auto entries = nonterms.equal_range(nonterm);
for (auto entry = entries.first; entry != entries.second; ++entry)
set.insert(std::get<0>(rules[entry->second]));
set.insert(grammar.rules[entry->second].item);
return set;
}
@ -117,15 +117,10 @@ void generator_t::generate_itemsets() {
}
}
generator_t::generator_t(const std::vector<std::tuple<item_t, std::vector<std::string>, std::string>> &rules0,
const std::map<std::string, std::string> &nonterm_types0,
const std::map<symbol_t, std::pair<std::string, std::string>> &term_types0,
const std::string &header_block0, const std::string &source_block0,
const std::vector<std::pair<std::string, std::string>> &extra_args0)
: rules(rules0), nonterm_types(nonterm_types0), term_types(term_types0),
header_block(header_block0), source_block(source_block0), extra_args(extra_args0) {
for (size_t i = 0; i < rules.size(); i++) {
item_t rule = std::get<0>(rules[i]);
generator_t::generator_t(const grammar_t &grammar0)
: grammar(grammar0) {
for (size_t i = 0; i < grammar.rules.size(); i++) {
item_t rule = grammar.rules[i].item;
nonterminals.insert(rule.get_lhs());
nonterms.insert(std::make_pair(rule.get_lhs(), i));

View file

@ -26,7 +26,7 @@
#pragma once
#include "item.hpp"
#include "grammar.hpp"
#include <stdexcept>
#include <set>
@ -44,7 +44,8 @@ public:
};
private:
std::vector<std::tuple<item_t, std::vector<std::string>, std::string>> rules;
grammar_t grammar;
std::map<item_t, size_t> rule_ids;
std::multimap<std::string, size_t> nonterms;
@ -60,14 +61,6 @@ private:
std::set<size_t> shift_conflicts;
std::map<std::string, std::string> nonterm_types;
std::map<symbol_t, std::pair<std::string, std::string>> term_types;
std::string header_block;
std::string source_block;
std::vector<std::pair<std::string, std::string>> extra_args;
void close_set(std::set<item_t> *set);
std::set<item_t> get_set(const std::string &nonterm);
@ -100,34 +93,8 @@ private:
void generate_itemsets();
public:
const std::string & get_nonterm_type(const std::string &sym) const {
static const std::string empty;
auto it = nonterm_types.find(sym);
if (it == nonterm_types.end())
return empty;
else
return it->second;
}
const std::pair<std::string, std::string> & get_term_type(const symbol_t &sym) const {
static const std::pair<std::string, std::string> empty;
auto it = term_types.find(sym);
if (it == term_types.end())
return empty;
else
return it->second;
}
const std::string & get_type(const symbol_t &sym) const {
switch (sym.get_type()) {
case SYMBOL_TYPE_NONTERM:
return get_nonterm_type(sym.get_value());
default:
return get_term_type(sym).first;
}
const grammar_t & get_grammar() const {
return grammar;
}
const std::set<std::string> & get_nonterminals() const {
@ -142,10 +109,6 @@ public:
return itemsets.size();
}
const std::vector<std::tuple<item_t, std::vector<std::string>, std::string>> & get_rules() const {
return rules;
}
const std::map<size_t, size_t> & get_reductions() const {
return reductions;
}
@ -158,24 +121,8 @@ public:
return gotos;
}
const std::string & get_header_block() const {
return header_block;
}
const std::string & get_source_block() const {
return source_block;
}
const std::vector<std::pair<std::string, std::string>> & get_extra_args() const {
return extra_args;
}
generator_t(const std::vector<std::tuple<item_t, std::vector<std::string>, std::string>> &rules0,
const std::map<std::string, std::string> &nonterm_types0,
const std::map<symbol_t, std::pair<std::string, std::string>> &term_types0,
const std::string &header_block0, const std::string &source_block0,
const std::vector<std::pair<std::string, std::string>> &extra_args0);
generator_t(const grammar_t &grammar0);
};
}

79
src/grammar.hpp Normal file
View file

@ -0,0 +1,79 @@
/*
Copyright (c) 2015, Matthias Schiffer <mschiffer@universe-factory.net>
All rights reserved.
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions are met:
1. Redistributions of source code must retain the above copyright notice,
this list of conditions and the following disclaimer.
2. Redistributions in binary form must reproduce the above copyright notice,
this list of conditions and the following disclaimer in the documentation
and/or other materials provided with the distribution.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
#pragma once
#include "rule.hpp"
#include <map>
namespace solar {
struct grammar_t {
std::vector<rule_t> rules;
std::map<std::string, std::string> nonterm_types;
std::map<symbol_t, std::pair<std::string, std::string>> term_types;
std::string header_block;
std::string source_block;
std::vector<std::pair<std::string, std::string>> extra_args;
const std::string & get_nonterm_type(const std::string &sym) const {
static const std::string empty;
auto it = nonterm_types.find(sym);
if (it == nonterm_types.end())
return empty;
else
return it->second;
}
const std::pair<std::string, std::string> & get_term_type(const symbol_t &sym) const {
static const std::pair<std::string, std::string> empty;
auto it = term_types.find(sym);
if (it == term_types.end())
return empty;
else
return it->second;
}
const std::string & get_type(const symbol_t &sym) const {
switch (sym.get_type()) {
case SYMBOL_TYPE_NONTERM:
return get_nonterm_type(sym.get_value());
default:
return get_term_type(sym).first;
}
}
};
}

View file

@ -34,8 +34,8 @@
namespace solar {
struct item_t : public std::tuple<std::string, std::vector<symbol_t>, unsigned> {
item_t(const std::string &lhs)
: std::tuple<std::string, std::vector<symbol_t>, unsigned>(lhs, std::vector<symbol_t>(), 0) {}
item_t(const std::string &lhs, const std::vector<symbol_t> &rhs = std::vector<symbol_t>())
: std::tuple<std::string, std::vector<symbol_t>, unsigned>(lhs, rhs, 0) {}
const std::string & get_lhs() const {
return std::get<0>(*this);

View file

@ -54,7 +54,7 @@ output_t::output_t(const generator_t *generator0, const char *header, const char
if (term.get_type() == SYMBOL_TYPE_TERM)
tokens.insert(std::make_pair(term.get_value(), tokens.size()));
symbol_values.insert(std::make_pair(term, "token." + generator->get_term_type(term).second));
symbol_values.insert(std::make_pair(term, "token." + generator->get_grammar().get_term_type(term).second));
}
}
@ -80,8 +80,8 @@ void output_t::emit_token_value() {
std::map<std::string, std::string> token_values;
for (const auto &term : generator->get_terminals()) {
const auto &type = generator->get_term_type(term);
for (const symbol_t &term : generator->get_terminals()) {
const auto &type = generator->get_grammar().get_term_type(term);
if (!type.first.empty())
token_values.insert(std::make_pair(type.second, type.first));
}
@ -95,8 +95,8 @@ void output_t::emit_token_value() {
void output_t::emit_header() {
std::fprintf(header_file, "#pragma once\n\n");
if (!generator->get_header_block().empty())
std::fprintf(header_file, "%s\n", generator->get_header_block().c_str());
if (!generator->get_grammar().header_block.empty())
std::fprintf(header_file, "%s\n", generator->get_grammar().header_block.c_str());
emit_tokens();
emit_token_value();
@ -108,19 +108,19 @@ void output_t::emit_header() {
std::fprintf(header_file, "int %spush(%scontext_t *parser, int token, const %stoken_value_t *value", prefix(), prefix(), prefix());
for (const auto &arg : generator->get_extra_args())
for (const auto &arg : generator->get_grammar().extra_args)
std::fprintf(header_file, ", %s %s", arg.first.c_str(), arg.second.c_str());
std::fprintf(header_file, ");\n");
}
void output_t::emit_reduction(unsigned rule_id) {
const auto &rule = generator->get_rules()[rule_id];
const rule_t &rule = generator->get_grammar().rules[rule_id];
std::fprintf(source_file, "static inline ");
const item_t &item = std::get<0>(rule);
const std::string &type = generator->get_nonterm_type(item.get_lhs());
const item_t &item = rule.item;
const std::string &type = generator->get_grammar().get_nonterm_type(item.get_lhs());
if (type.empty())
std::fprintf(source_file, "void");
else
@ -129,8 +129,8 @@ void output_t::emit_reduction(unsigned rule_id) {
std::fprintf(source_file, " %sreduce_%u(", prefix(), rule_id);
bool empty = true;
for (unsigned i = 0; i < std::get<1>(rule).size(); i++) {
const std::string &var = std::get<1>(rule)[i];
for (unsigned i = 0; i < rule.variables.size(); i++) {
const std::string &var = rule.variables[i];
if (var.empty())
continue;
@ -138,12 +138,12 @@ void output_t::emit_reduction(unsigned rule_id) {
if (!empty)
std::fprintf(source_file, ", ");
std::fprintf(source_file, "%s %s", generator->get_type(item.get_rhs()[i]).c_str(), var.c_str());
std::fprintf(source_file, "%s %s", generator->get_grammar().get_type(item.get_rhs()[i]).c_str(), var.c_str());
empty = false;
}
for (const auto &arg : generator->get_extra_args()) {
for (const auto &arg : generator->get_grammar().extra_args) {
if (!empty)
std::fprintf(source_file, ", ");
@ -156,16 +156,16 @@ void output_t::emit_reduction(unsigned rule_id) {
std::fprintf(source_file, "void");
std::fprintf(source_file, ") {");
std::fprintf(source_file, "%s", std::get<2>(rule).c_str());
std::fprintf(source_file, "%s", rule.action.c_str());
std::fprintf(source_file, "}\n\n");
}
void output_t::emit_reductions() {
const auto &rules = generator->get_rules();
const auto &rules = generator->get_grammar().rules;
for (size_t i = 0; i < rules.size(); i++) {
if (!std::get<2>(rules[i]).empty())
if (!rules[i].action.empty())
emit_reduction(i);
}
}
@ -205,7 +205,7 @@ void output_t::emit_state_reduce(const item_t &item, int rule_id) {
std::fprintf(source_file, "\t\t\tparser->top -= %u;\n", unsigned(rhs.size()));
if (rule_id >= 0) {
const std::string &type = generator->get_nonterm_type(item.get_lhs());
const std::string &type = generator->get_grammar().get_nonterm_type(item.get_lhs());
std::fprintf(source_file, "\t\t\t");
if (!type.empty())
@ -213,7 +213,7 @@ void output_t::emit_state_reduce(const item_t &item, int rule_id) {
std::fprintf(source_file, "%sreduce_%i(", prefix(), rule_id);
bool empty = true;
const auto &vars = std::get<1>(generator->get_rules()[rule_id]);
const auto &vars = generator->get_grammar().rules[rule_id].variables;
for (unsigned i = 0; i < vars.size(); i++) {
const std::string &var = vars[i];
if (var.empty())
@ -226,7 +226,7 @@ void output_t::emit_state_reduce(const item_t &item, int rule_id) {
empty = false;
}
for (const auto &arg : generator->get_extra_args()) {
for (const auto &arg : generator->get_grammar().extra_args) {
if (!empty)
std::fprintf(source_file, ", ");
@ -277,8 +277,8 @@ void output_t::emit_state(unsigned i) {
emit_state_shift(i);
}
else {
const auto &rule = generator->get_rules()[it->second];
emit_state_reduce(std::get<0>(rule), std::get<2>(rule).empty() ? -1 : it->second);
const rule_t &rule = generator->get_grammar().rules[it->second];
emit_state_reduce(rule.item, rule.action.empty() ? -1 : it->second);
}
std::fprintf(source_file, "\t\t\tbreak;\n\n");
@ -305,14 +305,14 @@ void output_t::emit_header_include() {
void output_t::emit_source() {
emit_header_include();
if (!generator->get_source_block().empty())
std::fprintf(source_file, "%s\n\n", generator->get_source_block().c_str());
if (!generator->get_grammar().source_block.empty())
std::fprintf(source_file, "%s\n\n", generator->get_grammar().source_block.c_str());
std::fprintf(source_file, "typedef union %ssymbol_value {\n", prefix());
std::fprintf(source_file, "\t%stoken_value_t token;\n", prefix());
for (const auto &nonterm : generator->get_nonterminals()) {
const std::string &type = generator->get_nonterm_type(nonterm);
const std::string &type = generator->get_grammar().get_nonterm_type(nonterm);
if (!type.empty())
std::fprintf(source_file, "\t%s symbol_%s;\n", type.c_str(), nonterm.c_str());
@ -344,7 +344,7 @@ void output_t::emit_source() {
emit_reductions();
std::fprintf(source_file, "int %spush(%scontext_t *parser, int token, const %stoken_value_t *value", prefix(), prefix(), prefix());
for (const auto &arg : generator->get_extra_args())
for (const auto &arg : generator->get_grammar().extra_args)
std::fprintf(source_file, ", %s %s", arg.first.c_str(), arg.second.c_str());
std::fprintf(source_file, ") {\n");

View file

@ -30,7 +30,7 @@
namespace solar {
void parser_state_t::new_rule(const char *nonterm) {
if (rules.empty()) {
if (grammar.rules.empty()) {
// start rule
add_rule_nonterminal(nonterm);
add_rule();
@ -56,7 +56,7 @@ void parser_state_t::add_rule_terminal(unsigned char term) {
}
void parser_state_t::add_rule(const std::string &action) {
rules.emplace_back(std::move(current), std::move(current_vars), action);
grammar.rules.emplace_back(rule_t{std::move(current), std::move(current_vars), action});
}
void parser_state_t::add_rule_var(const char *var) {
@ -72,7 +72,7 @@ void parser_state_t::add_type_terminal(const char *term) {
}
void parser_state_t::set_type_nonterminal(const char *type) {
nonterm_types.insert(std::make_pair(current_var, type));
grammar.nonterm_types.insert(std::make_pair(current_var, type));
}
void parser_state_t::set_type_terminal(const char *type) {
@ -80,7 +80,7 @@ void parser_state_t::set_type_terminal(const char *type) {
}
void parser_state_t::set_type_terminal_name(const char *name) {
term_types.insert(std::make_pair(symbol_t::make_term(current_var.c_str()), std::make_pair(current_type, name)));
grammar.term_types.insert(std::make_pair(symbol_t::make_term(current_var.c_str()), std::make_pair(current_type, name)));
}
}

View file

@ -26,63 +26,35 @@
#pragma once
#include "item.hpp"
#include <map>
#include "grammar.hpp"
namespace solar {
class parser_state_t {
private:
std::vector<std::tuple<item_t, std::vector<std::string>, std::string>> rules;
std::map<std::string, std::string> nonterm_types;
std::map<symbol_t, std::pair<std::string, std::string>> term_types;
grammar_t grammar;
item_t current;
std::vector<std::string> current_vars;
std::string current_var;
std::string current_type;
std::string header_block;
std::string source_block;
std::string current_extra_arg;
std::vector<std::pair<std::string, std::string>> extra_args;
public:
parser_state_t() : current("") {}
const std::vector<std::tuple<item_t, std::vector<std::string>, std::string>> & get_rules() const {
return rules;
}
const std::map<std::string, std::string> & get_nonterm_types() const {
return nonterm_types;
}
const std::map<symbol_t, std::pair<std::string, std::string>> & get_term_types() const {
return term_types;
}
const std::string & get_header_block() const {
return header_block;
const grammar_t & get_grammar() const {
return grammar;
}
void set_header_block(const char *value) {
header_block = value;
}
const std::string & get_source_block() const {
return source_block;
grammar.header_block = value;
}
void set_source_block(const char *value) {
source_block = value;
}
const std::vector<std::pair<std::string, std::string>> & get_extra_args() const {
return extra_args;
grammar.source_block = value;
}
void add_extra_arg(const char *type) {
@ -90,7 +62,7 @@ public:
}
void set_extra_arg_name(const char *name) {
extra_args.emplace_back(current_extra_arg, name);
grammar.extra_args.emplace_back(current_extra_arg, name);
}

40
src/rule.hpp Normal file
View file

@ -0,0 +1,40 @@
/*
Copyright (c) 2015, Matthias Schiffer <mschiffer@universe-factory.net>
All rights reserved.
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions are met:
1. Redistributions of source code must retain the above copyright notice,
this list of conditions and the following disclaimer.
2. Redistributions in binary form must reproduce the above copyright notice,
this list of conditions and the following disclaimer in the documentation
and/or other materials provided with the distribution.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
#pragma once
#include "item.hpp"
namespace solar {
struct rule_t {
item_t item;
std::vector<std::string> variables;
std::string action;
};
}

View file

@ -87,12 +87,7 @@ int main(int argc, char *argv[]) {
if (!read_grammar(argv[1], &state))
return 1;
generator_t generator(state.get_rules(),
state.get_nonterm_types(),
state.get_term_types(),
state.get_header_block(),
state.get_source_block(),
state.get_extra_args());
generator_t generator(state.get_grammar());
output_t output(&generator, argv[3], argv[2]);
output.write();