From aeaeba2d7362a055c5c78f410a743ea977d8cd36 Mon Sep 17 00:00:00 2001 From: Matthias Schiffer Date: Fri, 10 Apr 2015 19:00:43 +0200 Subject: Add support for escape sequences in character symbols --- src/lex.cpp | 61 +++++++++++++++++++++++++++++++++++++++++++++++++----- src/output.cpp | 42 +++++++++++++++++++++++++++++++++++++ src/output.hpp | 2 ++ src/output_lr0.cpp | 6 +----- src/output_slr.cpp | 15 ++------------ 5 files changed, 103 insertions(+), 23 deletions(-) diff --git a/src/lex.cpp b/src/lex.cpp index 5064dd7..3058232 100644 --- a/src/lex.cpp +++ b/src/lex.cpp @@ -337,13 +337,67 @@ int lex_t::lex(parse_token_value_t *value) { if (!next(true)) return syntax_error(value); - value->c = current(); + if (current() == '\\') { + if (!next(true)) + return syntax_error(value); + + switch (current()) { + case 'a': + value->c = '\a'; + break; + + case 'b': + value->c = '\b'; + break; + + case 'f': + value->c = '\f'; + break; + + case 'n': + value->c = '\n'; + break; + + case 'r': + value->c = '\r'; + break; + + case 't': + value->c = '\t'; + break; + + case 'v': + value->c = '\v'; + break; + + case '\\': + value->c = '\\'; + break; + + case '\'': + value->c = '\''; + break; + + case '"': + value->c = '"'; + break; + + case '?': + value->c = '?'; + break; + + default: + return syntax_error(value); + } + } + else { + value->c = current(); + } if (!next(true) || current() != '\'') return syntax_error(value); next(true); - consume(false); return TOK_CHAR; @@ -354,9 +408,6 @@ int lex_t::lex(parse_token_value_t *value) { case '"': return lex_string(value); - //case '0' ... '9': - //return lex_number(value); - case 'a' ... 'z': case 'A' ... 'Z': return lex_symbol(value); diff --git a/src/output.cpp b/src/output.cpp index 7f6e5c8..9c30443 100644 --- a/src/output.cpp +++ b/src/output.cpp @@ -64,6 +64,48 @@ void output_t::initialize() { } } +std::string output_t::symbol_case(const symbol_t &sym) { + if (sym.get_type() == SYMBOL_TYPE_CHAR) { + switch (sym.get_value()[0]) { + case '\a': + return "'\\a'"; + + case '\b': + return "'\\b'"; + + case '\f': + return "'\\f'"; + + case '\n': + return "'\\n'"; + + case '\r': + return "'\\r'"; + + case '\t': + return "'\\t'"; + + case '\v': + return "'\\v'"; + + case '\\': + return "'\\\\'"; + + case '\'': + return "'\\''"; + + default: + return "'" + sym.get_value() + "'"; + } + } + else { + if (sym.get_value().empty()) + return "0"; + else + return token_prefix_str + sym.get_value(); + } +} + void output_t::emit_tokens() { if (tokens.empty()) return; diff --git a/src/output.hpp b/src/output.hpp index 8c457dd..da5fb2f 100644 --- a/src/output.hpp +++ b/src/output.hpp @@ -57,6 +57,8 @@ protected: return token_prefix_str.c_str(); } + std::string symbol_case(const symbol_t &sym); + void emit_tokens(); void emit_token_value(); void emit_header(); diff --git a/src/output_lr0.cpp b/src/output_lr0.cpp index 86206a3..b821333 100644 --- a/src/output_lr0.cpp +++ b/src/output_lr0.cpp @@ -42,11 +42,7 @@ void output_lr0_t::emit_state_shift(unsigned i) { if (it == generator->get_shifts().end()) continue; - if (token.get_type() == SYMBOL_TYPE_CHAR) - std::fprintf(source_file, "\t\t\tcase '%c':\n", token.get_value()[0]); - else - std::fprintf(source_file, "\t\t\tcase %s%s:\n", token_prefix(), token.get_value().c_str()); - + std::fprintf(source_file, "\t\t\tcase %s:\n", symbol_case(token).c_str()); std::fprintf(source_file, "\t\t\t\tparser->stack[parser->top].value.token = *value;\n"); std::fprintf(source_file, "\t\t\t\tparser->stack[++parser->top].state = %u;\n", unsigned(it->second)); std::fprintf(source_file, "\t\t\t\treturn 1;\n\n"); diff --git a/src/output_slr.cpp b/src/output_slr.cpp index ad0d415..371d55b 100644 --- a/src/output_slr.cpp +++ b/src/output_slr.cpp @@ -38,25 +38,14 @@ void output_slr_t::emit_state_shift(unsigned i, const symbol_t &token) { if (it == generator->get_shifts().end()) return; - if (token.get_type() == SYMBOL_TYPE_CHAR) - std::fprintf(source_file, "\t\t\tcase '%c':\n", token.get_value()[0]); - else if (!token.get_value().empty()) - std::fprintf(source_file, "\t\t\tcase %s%s:\n", token_prefix(), token.get_value().c_str()); - else - std::fprintf(source_file, "\t\t\tcase 0:\n"); - + std::fprintf(source_file, "\t\t\tcase %s:\n", symbol_case(token).c_str()); std::fprintf(source_file, "\t\t\t\tparser->stack[parser->top].value.token = *value;\n"); std::fprintf(source_file, "\t\t\t\tparser->stack[++parser->top].state = %u;\n", unsigned(it->second)); std::fprintf(source_file, "\t\t\t\treturn 1;\n\n"); } void output_slr_t::emit_state_reduce(const item_t &item, const symbol_t &token, int rule_id) { - if (token.get_type() == SYMBOL_TYPE_CHAR) - std::fprintf(source_file, "\t\t\tcase '%c':\n", token.get_value()[0]); - else if (!token.get_value().empty()) - std::fprintf(source_file, "\t\t\tcase %s%s:\n", token_prefix(), token.get_value().c_str()); - else - std::fprintf(source_file, "\t\t\tcase 0:\n"); + std::fprintf(source_file, "\t\t\tcase %s:\n", symbol_case(token).c_str()); const auto &rhs = item.get_rhs(); if (rhs.size()) -- cgit v1.2.3