From d6deff997e4f882d9fa46b109fd32e713f054a6d Mon Sep 17 00:00:00 2001 From: Matthias Schiffer Date: Mon, 6 Apr 2015 18:50:03 +0200 Subject: Implement a lot of symbol value support --- src/generator.cpp | 10 +++-- src/generator.hpp | 46 +++++++++++++++++-- src/lex.cpp | 37 ++++++++------- src/lex.hpp | 2 +- src/output.cpp | 111 +++++++++++++++++++++++++++++++++++++++------ src/output.hpp | 4 +- src/parser.cpp | 125 ++++++++++++++++++++++++++++++++++++++++++++++++--- src/parser.hpp | 6 ++- src/parser_state.cpp | 33 ++++++++++++-- src/parser_state.hpp | 26 ++++++++++- src/solar.cpp | 2 +- 11 files changed, 353 insertions(+), 49 deletions(-) diff --git a/src/generator.cpp b/src/generator.cpp index d63b859..5b9766e 100644 --- a/src/generator.cpp +++ b/src/generator.cpp @@ -36,7 +36,7 @@ std::set generator_t::get_set(const std::string &nonterm) { auto entries = nonterms.equal_range(nonterm); for (auto entry = entries.first; entry != entries.second; ++entry) - set.insert(rules[entry->second].first); + set.insert(std::get<0>(rules[entry->second])); return set; } @@ -117,10 +117,14 @@ void generator_t::generate_itemsets() { } } -generator_t::generator_t(const std::vector> &rules0) : rules(rules0) { +generator_t::generator_t(const std::vector, std::string>> &rules0, + const std::map &nonterm_types0, + const std::map> &term_types0) + : rules(rules0), nonterm_types(nonterm_types0), term_types(term_types0) { for (size_t i = 0; i < rules.size(); i++) { - item_t rule = rules[i].first; + item_t rule = std::get<0>(rules[i]); + nonterminals.emplace(rule.get_lhs()); nonterms.emplace(rule.get_lhs(), i); while (rule.has_next()) { diff --git a/src/generator.hpp b/src/generator.hpp index 47ef6bf..bed048f 100644 --- a/src/generator.hpp +++ b/src/generator.hpp @@ -44,10 +44,11 @@ public: }; private: - std::vector> rules; + std::vector, std::string>> rules; std::map rule_ids; std::multimap nonterms; + std::set nonterminals; std::set terminals; std::multimap items; @@ -59,6 +60,9 @@ private: std::set shift_conflicts; + std::map nonterm_types; + std::map> term_types; + void close_set(std::set *set); std::set get_set(const std::string &nonterm); @@ -90,6 +94,40 @@ private: void generate_itemsets(); public: + const std::string & get_nonterm_type(const std::string &sym) const { + static const std::string empty; + + auto it = nonterm_types.find(sym); + if (it == nonterm_types.end()) + return empty; + else + return it->second; + } + + const std::pair & get_term_type(const symbol_t &sym) const { + static const std::pair empty; + + auto it = term_types.find(sym); + if (it == term_types.end()) + return empty; + else + return it->second; + } + + const std::string & get_type(const symbol_t &sym) const { + switch (sym.get_type()) { + case SYMBOL_TYPE_NONTERM: + return get_nonterm_type(sym.get_value()); + + default: + return get_term_type(sym).first; + } + } + + const std::set & get_nonterminals() const { + return nonterminals; + } + const std::set & get_terminals() const { return terminals; } @@ -98,7 +136,7 @@ public: return itemsets.size(); } - const std::vector> & get_rules() const { + const std::vector, std::string>> & get_rules() const { return rules; } @@ -114,7 +152,9 @@ public: return gotos; } - generator_t(const std::vector> &rules0); + generator_t(const std::vector, std::string>> &rules0, + const std::map &nonterm_types0, + const std::map> &term_types0); }; } diff --git a/src/lex.cpp b/src/lex.cpp index 851ac87..3adfb5b 100644 --- a/src/lex.cpp +++ b/src/lex.cpp @@ -42,6 +42,7 @@ struct keyword_t { /* the keyword list must be sorted */ static const keyword_t keywords[] = { + {"%type", TOK_TYPE}, }; static int compare_keywords(const void *v1, const void *v2) { @@ -330,24 +331,21 @@ int lex_t::lex_block(parser_value_t *value) { return TOK_BLOCK; } -int lex_t::lex_symbol(parser_value_t *value, bool terminal) { +int lex_t::lex_symbol(parser_value_t *value) { if (needspace) return syntax_error(value); - while (next(false)) { - char cur = current(); + bool uc = true; + bool lc = true; - switch (cur) { + do { + switch (current()) { case 'A' ... 'Z': - if (!terminal) - break; - + lc = false; continue; case 'a' ... 'z': - if (terminal) - break; - + uc = false; continue; case '0' ... '9': @@ -356,10 +354,16 @@ int lex_t::lex_symbol(parser_value_t *value, bool terminal) { } break; - } + } while (next(false)); value->str = get_token(); - return terminal ? TOK_TERM : TOK_NONTERM; + + if (uc) + return TOK_SYMBOL_UC; + else if (lc) + return TOK_SYMBOL_LC; + else + return TOK_SYMBOL; } int lex_t::lex(parser_value_t *value) { @@ -382,6 +386,8 @@ int lex_t::lex(parser_value_t *value) { case ':': case '|': case '=': + case '(': + case ')': token = current(); next(true); consume(false); @@ -436,10 +442,11 @@ int lex_t::lex(parser_value_t *value) { return lex_number(value); case 'a' ... 'z': - return lex_symbol(value, false); - case 'A' ... 'Z': - return lex_symbol(value, true); + return lex_symbol(value); + + case '%': + return lex_keyword(value); default: return syntax_error(value); diff --git a/src/lex.hpp b/src/lex.hpp index 6d193ea..dcf8b16 100644 --- a/src/lex.hpp +++ b/src/lex.hpp @@ -72,7 +72,7 @@ private: int lex_number(parser_value_t *value); int lex_keyword(parser_value_t *value); int lex_block(parser_value_t *value); - int lex_symbol(parser_value_t *value, bool terminal); + int lex_symbol(parser_value_t *value); char current() { return buffer[start + tok_len]; diff --git a/src/output.cpp b/src/output.cpp index d4f86a9..7e9029b 100644 --- a/src/output.cpp +++ b/src/output.cpp @@ -45,9 +45,14 @@ output_t::output_t(const generator_t *generator0, const char *header, const char if (!source_file) throw std::system_error(errno, std::generic_category(), "unable to open source output file for writing"); - for (const auto &token : generator->get_terminals()) { - if (token.get_type() == SYMBOL_TYPE_TERM) - tokens.emplace(token.get_value(), tokens.size()); + for (const std::string &nonterm : generator->get_nonterminals()) + symbol_values.emplace(symbol_t::make_nonterm(nonterm.c_str()), "symbol_" + nonterm); + + for (const symbol_t &term : generator->get_terminals()) { + if (term.get_type() == SYMBOL_TYPE_TERM) + tokens.emplace(term.get_value(), tokens.size()); + + symbol_values.emplace(term, "token." + generator->get_term_type(term).second); } } @@ -65,6 +70,18 @@ void output_t::emit_tokens() { void output_t::emit_token_value() { std::fprintf(header_file, "typedef struct %stoken_value {\n", prefix()); + + std::map token_values; + + for (const auto &term : generator->get_terminals()) { + const auto &type = generator->get_term_type(term); + if (!type.first.empty()) + token_values.emplace(type.second, type.first); + } + + for (const auto &value : token_values) + std::fprintf(header_file, "\t%s %s;\n", value.second.c_str(), value.first.c_str()); + std::fprintf(header_file, "} %stoken_value_t;\n\n", prefix()); } @@ -75,9 +92,40 @@ void output_t::emit_header() { std::fprintf(header_file, "typedef struct %scontext %scontext_t;\n", prefix(), prefix()); } -void output_t::emit_reduction(unsigned rule_id, const std::string &action) { - std::fprintf(source_file, "static inline void %sreduce_%u(void) {", prefix(), rule_id); - std::fprintf(source_file, "%s", action.c_str()); +void output_t::emit_reduction(unsigned rule_id) { + const auto &rule = generator->get_rules()[rule_id]; + + std::fprintf(source_file, "static inline "); + + const item_t &item = std::get<0>(rule); + const std::string &type = generator->get_nonterm_type(item.get_lhs()); + if (type.empty()) + std::fprintf(source_file, "void"); + else + std::fprintf(source_file, "%s", type.c_str()); + + std::fprintf(source_file, " %sreduce_%u(", prefix(), rule_id); + + bool empty = true; + for (unsigned i = 0; i < std::get<1>(rule).size(); i++) { + const std::string &var = std::get<1>(rule)[i]; + + if (var.empty()) + continue; + + if (!empty) + std::fprintf(source_file, ", "); + + std::fprintf(source_file, "%s %s", generator->get_type(item.get_rhs()[i]).c_str(), var.c_str()); + + empty = false; + } + + if (empty) + std::fprintf(source_file, "void"); + + std::fprintf(source_file, ") {"); + std::fprintf(source_file, "%s", std::get<2>(rule).c_str()); std::fprintf(source_file, "}\n\n"); } @@ -86,8 +134,8 @@ void output_t::emit_reductions() { const auto &rules = generator->get_rules(); for (size_t i = 0; i < rules.size(); i++) { - if (!rules[i].second.empty()) - emit_reduction(i, rules[i].second); + if (!std::get<2>(rules[i]).empty()) + emit_reduction(i); } } @@ -110,6 +158,7 @@ void output_t::emit_state_shift(unsigned i) { std::fprintf(source_file, "\t\t\tcase %s%s:\n", token_prefix(), token.get_value().c_str()); std::fprintf(source_file, "\t\t\t\tparser->stack[++parser->top].state = %u;\n", unsigned(it->second)); + std::fprintf(source_file, "\t\t\t\tparser->stack[parser->top].value.token = *value;\n"); std::fprintf(source_file, "\t\t\t\treturn 1;\n\n"); } @@ -120,11 +169,34 @@ void output_t::emit_state_shift(unsigned i) { } void output_t::emit_state_reduce(const item_t &item, int rule_id) { - if (item.get_rhs().size()) - std::fprintf(source_file, "\t\t\tparser->top -= %u;\n", unsigned(item.get_rhs().size())); + const auto &rhs = item.get_rhs(); + if (rhs.size()) + std::fprintf(source_file, "\t\t\tparser->top -= %u;\n", unsigned(rhs.size())); + + if (rule_id >= 0) { + const std::string &type = generator->get_nonterm_type(item.get_lhs()); + + std::fprintf(source_file, "\t\t\t"); + if (!type.empty()) + std::fprintf(source_file, "parser->stack[parser->top].value.symbol_%s = ", item.get_lhs().c_str()); + std::fprintf(source_file, "%sreduce_%i(", prefix(), rule_id); + + bool empty = true; + const auto &vars = std::get<1>(generator->get_rules()[rule_id]); + for (unsigned i = 0; i < vars.size(); i++) { + const std::string &var = vars[i]; + if (var.empty()) + continue; - if (rule_id >= 0) - std::fprintf(source_file, "\t\t\t%sreduce_%i();\n", prefix(), rule_id); + if (!empty) + std::fprintf(source_file, ", "); + + std::fprintf(source_file, "parser->stack[parser->top + %u].value.%s", i, symbol_values[rhs[i]].c_str()); + empty = false; + } + + std::fprintf(source_file, ");\n"); + } std::vector> gotos; @@ -166,7 +238,7 @@ void output_t::emit_state(unsigned i) { } else { const auto &rule = generator->get_rules()[it->second]; - emit_state_reduce(rule.first, rule.second.empty() ? -1 : it->second); + emit_state_reduce(std::get<0>(rule), std::get<2>(rule).empty() ? -1 : it->second); } std::fprintf(source_file, "\t\t\tbreak;\n\n"); @@ -178,8 +250,21 @@ void output_t::emit_states() { } void output_t::emit_source() { + std::fprintf(source_file, "typedef union %ssymbol_value {\n", prefix()); + std::fprintf(source_file, "\t%stoken_value_t token;\n", prefix()); + + for (const auto &nonterm : generator->get_nonterminals()) { + const std::string &type = generator->get_nonterm_type(nonterm); + + if (!type.empty()) + std::fprintf(source_file, "\t%s symbol_%s;\n", type.c_str(), nonterm.c_str()); + } + + std::fprintf(source_file, "} %ssymbol_value_t;\n\n", prefix()); + std::fprintf(source_file, "typedef struct %scontext_state {\n", prefix()); std::fprintf(source_file, "\tunsigned state;\n"); + std::fprintf(source_file, "\t%ssymbol_value_t value;\n", prefix()); std::fprintf(source_file, "} %scontext_state_t;\n\n", prefix()); std::fprintf(source_file, "struct %scontext {\n", prefix()); diff --git a/src/output.hpp b/src/output.hpp index fe4bc19..439e754 100644 --- a/src/output.hpp +++ b/src/output.hpp @@ -47,6 +47,8 @@ private: std::map tokens; std::map nonterms; + std::map symbol_values; + const char * prefix() const { return prefix_str.c_str(); } @@ -59,7 +61,7 @@ private: void emit_token_value(); void emit_header(); - void emit_reduction(unsigned rule_id, const std::string &action); + void emit_reduction(unsigned rule_id); void emit_reductions(); void emit_state_shift(unsigned i); void emit_state_reduce(const item_t &item, int rule_id); diff --git a/src/parser.cpp b/src/parser.cpp index a1ee376..9d2fbce 100644 --- a/src/parser.cpp +++ b/src/parser.cpp @@ -36,6 +36,13 @@ enum parser_state { STATE_RULE_BAR, STATE_RULE_EQUAL, STATE_RULE, + STATE_RULE_VAR_PRE, + STATE_RULE_VAR, + STATE_RULE_VAR_POST, + STATE_TYPE, + STATE_TYPE_NONTERM, + STATE_TYPE_TERM, + STATE_TYPE_TERM_BLOCK, }; struct parser { @@ -53,12 +60,16 @@ int parser_push(parser_t *parser, int token, const parser_value_t *value, parser switch (parser->state) { case STATE_INIT: switch (token) { - case TOK_NONTERM: + case TOK_SYMBOL_LC: parser->state = STATE_RULE_BAR; state->new_rule(value->str); free(value->str); return 1; + case TOK_TYPE: + parser->state = STATE_TYPE; + return 1; + case 0: return 0; } @@ -83,12 +94,12 @@ int parser_push(parser_t *parser, int token, const parser_value_t *value, parser case STATE_RULE: switch (token) { - case TOK_NONTERM: + case TOK_SYMBOL_LC: state->add_rule_nonterminal(value->str); free(value->str); return 1; - case TOK_TERM: + case TOK_SYMBOL_UC: state->add_rule_terminal(value->str); free(value->str); return 1; @@ -103,6 +114,10 @@ int parser_push(parser_t *parser, int token, const parser_value_t *value, parser parser->state = STATE_INIT; return 1; + case '(': + parser->state = STATE_RULE_VAR_PRE; + return 1; + case ';': state->add_rule(); parser->state = STATE_INIT; @@ -110,11 +125,111 @@ int parser_push(parser_t *parser, int token, const parser_value_t *value, parser } break; + + case STATE_RULE_VAR_PRE: + switch (token) { + case TOK_SYMBOL: + case TOK_SYMBOL_UC: + case TOK_SYMBOL_LC: + state->add_rule_var(value->str); + free(value->str); + parser->state = STATE_RULE_VAR; + return 1; + } + + break; + + case STATE_RULE_VAR: + if (token == ')') { + parser->state = STATE_RULE; + return 1; + } + + break; + + case STATE_RULE_VAR_POST: + switch (token) { + case TOK_SYMBOL_LC: + state->add_rule_nonterminal(value->str); + free(value->str); + return 1; + + case TOK_SYMBOL_UC: + state->add_rule_terminal(value->str); + free(value->str); + return 1; + + case TOK_CHAR: + state->add_rule_terminal(value->number); + return 1; + + case TOK_BLOCK: + state->add_rule(value->str); + free(value->str); + parser->state = STATE_INIT; + return 1; + + case ';': + state->add_rule(); + parser->state = STATE_INIT; + return 1; + } + + break; + + case STATE_TYPE: + switch (token) { + case TOK_SYMBOL_LC: + state->add_type_nonterminal(value->str); + free(value->str); + parser->state = STATE_TYPE_NONTERM; + return 1; + + case TOK_SYMBOL_UC: + state->add_type_terminal(value->str); + free(value->str); + parser->state = STATE_TYPE_TERM; + return 1; + } + + break; + + case STATE_TYPE_NONTERM: + if (token == TOK_BLOCK) { + state->set_type_nonterminal(value->str); + free(value->str); + parser->state = STATE_INIT; + return 1; + } + + break; + + case STATE_TYPE_TERM: + if (token == TOK_BLOCK) { + state->set_type_terminal(value->str); + free(value->str); + parser->state = STATE_TYPE_TERM_BLOCK; + return 1; + } + + break; + + case STATE_TYPE_TERM_BLOCK: + switch (token) { + case TOK_SYMBOL: + case TOK_SYMBOL_UC: + case TOK_SYMBOL_LC: + state->set_type_terminal_name(value->str); + free(value->str); + parser->state = STATE_INIT; + return 1; + } } switch (token) { - case TOK_NONTERM: - case TOK_TERM: + case TOK_SYMBOL: + case TOK_SYMBOL_UC: + case TOK_SYMBOL_LC: case TOK_CHAR: case TOK_BLOCK: free(value->str);; diff --git a/src/parser.hpp b/src/parser.hpp index 552dc37..f677bf1 100644 --- a/src/parser.hpp +++ b/src/parser.hpp @@ -34,11 +34,13 @@ namespace solar { enum parser_token_t { - TOK_TERM = 256, - TOK_NONTERM, + TOK_SYMBOL = 256, + TOK_SYMBOL_UC, + TOK_SYMBOL_LC, TOK_BLOCK, TOK_CHAR, TOK_UINT, + TOK_TYPE, }; typedef struct parser_value { diff --git a/src/parser_state.cpp b/src/parser_state.cpp index 92584a6..dfbe8a5 100644 --- a/src/parser_state.cpp +++ b/src/parser_state.cpp @@ -32,28 +32,55 @@ namespace solar { void parser_state_t::new_rule(const char *nonterm) { if (rules.empty()) { // start rule - current.get_rhs().emplace_back(symbol_t::make_nonterm(nonterm)); + add_rule_nonterminal(nonterm); add_rule(); } current = item_t(nonterm); - + current_vars = std::vector(); } void parser_state_t::add_rule_nonterminal(const char *nonterm) { current.get_rhs().emplace_back(symbol_t::make_nonterm(nonterm)); + current_vars.emplace_back(); } void parser_state_t::add_rule_terminal(const char *term) { current.get_rhs().emplace_back(symbol_t::make_term(term)); + current_vars.emplace_back(); } void parser_state_t::add_rule_terminal(unsigned char term) { current.get_rhs().emplace_back(symbol_t::make_char(term)); + current_vars.emplace_back(); } void parser_state_t::add_rule(const std::string &action) { - rules.emplace_back(current, action); + rules.emplace_back(std::move(current), std::move(current_vars), action); +} + +void parser_state_t::add_rule_var(const char *var) { + current_vars.back() = var; +} + +void parser_state_t::add_type_nonterminal(const char *nonterm) { + current_var = nonterm; +} + +void parser_state_t::add_type_terminal(const char *term) { + current_var = term; +} + +void parser_state_t::set_type_nonterminal(const char *type) { + nonterm_types.emplace(current_var, type); +} + +void parser_state_t::set_type_terminal(const char *type) { + current_type = type; +} + +void parser_state_t::set_type_terminal_name(const char *name) { + term_types.emplace(symbol_t::make_term(current_var.c_str()), std::make_pair(current_type, name)); } } diff --git a/src/parser_state.hpp b/src/parser_state.hpp index a120423..882fec2 100644 --- a/src/parser_state.hpp +++ b/src/parser_state.hpp @@ -28,27 +28,49 @@ #include "item.hpp" +#include + namespace solar { class parser_state_t { private: - std::vector> rules; + std::vector, std::string>> rules; + std::map nonterm_types; + std::map> term_types; item_t current; + std::vector current_vars; + std::string current_var; + std::string current_type; public: parser_state_t() : current("") {} - const std::vector> & get_rules() const { + const std::vector, std::string>> & get_rules() const { return rules; } + const std::map & get_nonterm_types() const { + return nonterm_types; + } + + const std::map> & get_term_types() const { + return term_types; + } + void new_rule(const char *nonterm); void add_rule_nonterminal(const char *nonterm); void add_rule_terminal(const char *term); void add_rule_terminal(unsigned char term); void add_rule(const std::string &action = ""); + void add_rule_var(const char *var); + + void add_type_nonterminal(const char *nonterm); + void add_type_terminal(const char *term); + void set_type_nonterminal(const char *type); + void set_type_terminal(const char *type); + void set_type_terminal_name(const char *name); }; } diff --git a/src/solar.cpp b/src/solar.cpp index c2e1545..8d876d0 100644 --- a/src/solar.cpp +++ b/src/solar.cpp @@ -87,7 +87,7 @@ int main(int argc, char *argv[]) { if (!read_grammar(argv[1], &state)) return 1; - generator_t generator(state.get_rules()); + generator_t generator(state.get_rules(), state.get_nonterm_types(), state.get_term_types()); output_t output(&generator, "/dev/stdout", "/dev/stdout"); output.write(); -- cgit v1.2.3