From 96dd0ebd2618df6ed0c1acf795e9a7054592b566 Mon Sep 17 00:00:00 2001 From: Matthias Schiffer Date: Sun, 5 Apr 2015 02:28:52 +0200 Subject: Add support for simple reduce actions --- src/generator.cpp | 6 ++-- src/generator.hpp | 6 ++-- src/lex.cpp | 84 ++++++++++++++++++++++++++++++++++++++++++++++++++-- src/lex.hpp | 8 ++--- src/output.cpp | 40 ++++++++++++++++++++----- src/output.hpp | 4 ++- src/parser.cpp | 5 ++++ src/parser_state.cpp | 4 +-- src/parser_state.hpp | 6 ++-- 9 files changed, 137 insertions(+), 26 deletions(-) diff --git a/src/generator.cpp b/src/generator.cpp index f336772..6198aba 100644 --- a/src/generator.cpp +++ b/src/generator.cpp @@ -36,7 +36,7 @@ std::set generator_t::get_set(const std::string &nonterm) { auto entries = nonterms.equal_range(nonterm); for (auto entry = entries.first; entry != entries.second; ++entry) - set.insert(rules[entry->second]); + set.insert(rules[entry->second].first); return set; } @@ -117,9 +117,9 @@ void generator_t::generate_itemsets() { } } -generator_t::generator_t(const std::vector &rules0) : rules(rules0) { +generator_t::generator_t(const std::vector> &rules0) : rules(rules0) { for (size_t i = 0; i < rules.size(); i++) { - item_t rule = rules[i]; + item_t rule = rules[i].first; nonterms.emplace(rule.get_lhs(), i); diff --git a/src/generator.hpp b/src/generator.hpp index 4c5f9af..8178bbd 100644 --- a/src/generator.hpp +++ b/src/generator.hpp @@ -36,7 +36,7 @@ namespace solar { class generator_t { private: - std::vector rules; + std::vector> rules; std::map rule_ids; std::multimap nonterms; @@ -67,7 +67,7 @@ public: return itemsets.size(); } - const std::vector & get_rules() const { + const std::vector> & get_rules() const { return rules; } @@ -83,7 +83,7 @@ public: return gotos; } - generator_t(const std::vector &rules0); + generator_t(const std::vector> &rules0); }; } diff --git a/src/lex.cpp b/src/lex.cpp index 7a3fb6a..70362a7 100644 --- a/src/lex.cpp +++ b/src/lex.cpp @@ -243,6 +243,85 @@ int lex_t::lex_keyword(parser_value_t *value) { return ret->token; } +int lex_t::unterminated_block(parser_value_t *value) { + if (ferror(file)) + return io_error(value); + + value->error = "unterminated code block"; + return -1; +} + +int lex_t::lex_block(parser_value_t *value) { + size_t parens = 0; + bool line_comment = false; + bool block_comment = false; + bool str = false; + + size_t pos = 0; + size_t len = 1024; + char *buf = static_cast(std::malloc(len)); + + char prev = 0; + + while (true) { + if (!next(true)) { + std::free(buf); + return unterminated_block(value); + } + + char cur = current(); + + if (line_comment) { + if (cur == '\n' || cur == '\r') + line_comment = false; + } + else if (block_comment) { + if (prev == '*' && cur == '/') + block_comment = false; + } + else if (str) { + if (prev != '\\' && cur == '"') + str = false; + } + else { + if (cur == '{') { + parens++; + } + else if (cur == '}') { + if (!parens) + break; + + parens--; + } + else if (cur == '"') { + str = true; + } + else if (prev == '/' && cur == '/') { + line_comment = true; + } + else if (prev == '/' && cur == '*') { + block_comment = true; + } + } + + if (pos >= len) { + len *= 2; + buf = static_cast(std::realloc(buf, len)); + } + + buf[pos++] = cur; + prev = cur; + } + + value->str = strndup(buf, pos); + std::free(buf); + + next(true); + consume(true); + + return TOK_BLOCK; +} + int lex_t::lex_symbol(parser_value_t *value, bool terminal) { if (needspace) return syntax_error(value); @@ -293,8 +372,6 @@ int lex_t::lex(parser_value_t *value) { case ';': case ':': - case '{': - case '}': case '|': case '=': token = current(); @@ -341,6 +418,9 @@ int lex_t::lex(parser_value_t *value) { return TOK_CHAR; + case '{': + return lex_block(value); + //case '"': //return lex_string(value); diff --git a/src/lex.hpp b/src/lex.hpp index 36e3c92..6d193ea 100644 --- a/src/lex.hpp +++ b/src/lex.hpp @@ -55,7 +55,7 @@ private: size_t start; size_t end; size_t tok_len; - char buffer[65536]; + char buffer[1024]; bool advance(); @@ -65,13 +65,13 @@ private: int io_error(parser_value_t *value); int syntax_error(parser_value_t *value); int consume_comment(parser_value_t *value); + int unterminated_block(parser_value_t *value); //int unterminated_string(parser_value_t *value); - int lex_string(parser_value_t *value); - int lex_address(parser_value_t *value); - int lex_float(parser_value_t *value); + //int lex_string(parser_value_t *value); int lex_number(parser_value_t *value); int lex_keyword(parser_value_t *value); + int lex_block(parser_value_t *value); int lex_symbol(parser_value_t *value, bool terminal); char current() { diff --git a/src/output.cpp b/src/output.cpp index 0901b29..1b4d6d8 100644 --- a/src/output.cpp +++ b/src/output.cpp @@ -64,8 +64,8 @@ void output_t::emit_tokens() { } void output_t::emit_token_value() { - std::fprintf(source_file, "typedef struct %stoken_value {\n", prefix()); - std::fprintf(source_file, "} %stoken_value_t;\n\n", prefix()); + std::fprintf(header_file, "typedef struct %stoken_value {\n", prefix()); + std::fprintf(header_file, "} %stoken_value_t;\n\n", prefix()); } void output_t::emit_header() { @@ -75,6 +75,22 @@ void output_t::emit_header() { std::fprintf(header_file, "typedef struct %scontext %scontext_t;\n", prefix(), prefix()); } +void output_t::emit_reduction(unsigned rule_id, const std::string &action) { + std::fprintf(source_file, "void %sreduce_%u(void) {", prefix(), rule_id); + std::fprintf(source_file, "%s", action.c_str()); + std::fprintf(source_file, "}\n\n"); + +} + +void output_t::emit_reductions() { + const auto &rules = generator->get_rules(); + + for (size_t i = 0; i < rules.size(); i++) { + if (!rules[i].second.empty()) + emit_reduction(i, rules[i].second); + } +} + void output_t::emit_state_shift(unsigned i) { std::fprintf(source_file, "\t\t\tswitch (token) {\n"); @@ -103,10 +119,13 @@ void output_t::emit_state_shift(unsigned i) { std::fprintf(source_file, "\t\t\t}\n"); } -void output_t::emit_state_reduce(const item_t &item) { +void output_t::emit_state_reduce(const item_t &item, int rule_id) { if (item.get_rhs().size()) std::fprintf(source_file, "\t\t\tparser->top -= %u;\n", unsigned(item.get_rhs().size())); + if (rule_id >= 0) + std::fprintf(source_file, "\t\t\t%sreduce_%i();\n", prefix(), rule_id); + std::vector> gotos; for (size_t i = 0; i < generator->get_state_count(); i++) { @@ -142,10 +161,13 @@ void output_t::emit_state(unsigned i) { std::fprintf(source_file, "\t\tcase %u:\n", i); auto it = generator->get_reductions().find(i); - if (it == generator->get_reductions().end()) + if (it == generator->get_reductions().end()) { emit_state_shift(i); - else - emit_state_reduce(generator->get_rules()[it->second]); + } + else { + const auto &rule = generator->get_rules()[it->second]; + emit_state_reduce(rule.first, rule.second.empty() ? -1 : it->second); + } std::fprintf(source_file, "\t\t\tbreak;\n\n"); } @@ -160,10 +182,12 @@ void output_t::emit_source() { std::fprintf(source_file, "\tunsigned state;\n"); std::fprintf(source_file, "} %scontext_state_t;\n\n", prefix()); - std::fprintf(source_file, "typedef struct %scontext {\n", prefix()); + std::fprintf(source_file, "struct %scontext {\n", prefix()); std::fprintf(source_file, "\tunsigned top;\n"); std::fprintf(source_file, "\t%scontext_state_t stack[%u];\n", prefix(), stack_size); - std::fprintf(source_file, "} %scontext_t;\n\n", prefix()); + std::fprintf(source_file, "};\n\n"); + + emit_reductions(); std::fprintf(source_file, "int %spush(%scontext_t *parser, int token, const %stoken_value_t *value) {\n", prefix(), prefix(), prefix()); std::fprintf(source_file, "\twhile (1) {\n"); diff --git a/src/output.hpp b/src/output.hpp index f129716..fe4bc19 100644 --- a/src/output.hpp +++ b/src/output.hpp @@ -59,8 +59,10 @@ private: void emit_token_value(); void emit_header(); + void emit_reduction(unsigned rule_id, const std::string &action); + void emit_reductions(); void emit_state_shift(unsigned i); - void emit_state_reduce(const item_t &item); + void emit_state_reduce(const item_t &item, int rule_id); void emit_state(unsigned i); void emit_states(); void emit_source(); diff --git a/src/parser.cpp b/src/parser.cpp index d21a196..a1ee376 100644 --- a/src/parser.cpp +++ b/src/parser.cpp @@ -98,6 +98,11 @@ int parser_push(parser_t *parser, int token, const parser_value_t *value, parser return 1; case TOK_BLOCK: + state->add_rule(value->str); + free(value->str); + parser->state = STATE_INIT; + return 1; + case ';': state->add_rule(); parser->state = STATE_INIT; diff --git a/src/parser_state.cpp b/src/parser_state.cpp index f7b9574..92584a6 100644 --- a/src/parser_state.cpp +++ b/src/parser_state.cpp @@ -52,8 +52,8 @@ void parser_state_t::add_rule_terminal(unsigned char term) { current.get_rhs().emplace_back(symbol_t::make_char(term)); } -void parser_state_t::add_rule() { - rules.emplace_back(current); +void parser_state_t::add_rule(const std::string &action) { + rules.emplace_back(current, action); } } diff --git a/src/parser_state.hpp b/src/parser_state.hpp index 3798586..a120423 100644 --- a/src/parser_state.hpp +++ b/src/parser_state.hpp @@ -33,14 +33,14 @@ namespace solar { class parser_state_t { private: - std::vector rules; + std::vector> rules; item_t current; public: parser_state_t() : current("") {} - const std::vector & get_rules() const { + const std::vector> & get_rules() const { return rules; } @@ -48,7 +48,7 @@ public: void add_rule_nonterminal(const char *nonterm); void add_rule_terminal(const char *term); void add_rule_terminal(unsigned char term); - void add_rule(); + void add_rule(const std::string &action = ""); }; } -- cgit v1.2.3