summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorMatthias Schiffer <mschiffer@universe-factory.net>2015-04-10 19:00:43 +0200
committerMatthias Schiffer <mschiffer@universe-factory.net>2015-04-10 19:10:17 +0200
commitaeaeba2d7362a055c5c78f410a743ea977d8cd36 (patch)
treeeceab3dcaf74b3d224aaab9481c0f051bed10c9c
parent475326bf1295f1985b1853d04ca036156bcad889 (diff)
downloadsolar-aeaeba2d7362a055c5c78f410a743ea977d8cd36.tar
solar-aeaeba2d7362a055c5c78f410a743ea977d8cd36.zip
Add support for escape sequences in character symbols
-rw-r--r--src/lex.cpp61
-rw-r--r--src/output.cpp42
-rw-r--r--src/output.hpp2
-rw-r--r--src/output_lr0.cpp6
-rw-r--r--src/output_slr.cpp15
5 files changed, 103 insertions, 23 deletions
diff --git a/src/lex.cpp b/src/lex.cpp
index 5064dd7..3058232 100644
--- a/src/lex.cpp
+++ b/src/lex.cpp
@@ -337,13 +337,67 @@ int lex_t::lex(parse_token_value_t *value) {
if (!next(true))
return syntax_error(value);
- value->c = current();
+ if (current() == '\\') {
+ if (!next(true))
+ return syntax_error(value);
+
+ switch (current()) {
+ case 'a':
+ value->c = '\a';
+ break;
+
+ case 'b':
+ value->c = '\b';
+ break;
+
+ case 'f':
+ value->c = '\f';
+ break;
+
+ case 'n':
+ value->c = '\n';
+ break;
+
+ case 'r':
+ value->c = '\r';
+ break;
+
+ case 't':
+ value->c = '\t';
+ break;
+
+ case 'v':
+ value->c = '\v';
+ break;
+
+ case '\\':
+ value->c = '\\';
+ break;
+
+ case '\'':
+ value->c = '\'';
+ break;
+
+ case '"':
+ value->c = '"';
+ break;
+
+ case '?':
+ value->c = '?';
+ break;
+
+ default:
+ return syntax_error(value);
+ }
+ }
+ else {
+ value->c = current();
+ }
if (!next(true) || current() != '\'')
return syntax_error(value);
next(true);
-
consume(false);
return TOK_CHAR;
@@ -354,9 +408,6 @@ int lex_t::lex(parse_token_value_t *value) {
case '"':
return lex_string(value);
- //case '0' ... '9':
- //return lex_number(value);
-
case 'a' ... 'z':
case 'A' ... 'Z':
return lex_symbol(value);
diff --git a/src/output.cpp b/src/output.cpp
index 7f6e5c8..9c30443 100644
--- a/src/output.cpp
+++ b/src/output.cpp
@@ -64,6 +64,48 @@ void output_t::initialize() {
}
}
+std::string output_t::symbol_case(const symbol_t &sym) {
+ if (sym.get_type() == SYMBOL_TYPE_CHAR) {
+ switch (sym.get_value()[0]) {
+ case '\a':
+ return "'\\a'";
+
+ case '\b':
+ return "'\\b'";
+
+ case '\f':
+ return "'\\f'";
+
+ case '\n':
+ return "'\\n'";
+
+ case '\r':
+ return "'\\r'";
+
+ case '\t':
+ return "'\\t'";
+
+ case '\v':
+ return "'\\v'";
+
+ case '\\':
+ return "'\\\\'";
+
+ case '\'':
+ return "'\\''";
+
+ default:
+ return "'" + sym.get_value() + "'";
+ }
+ }
+ else {
+ if (sym.get_value().empty())
+ return "0";
+ else
+ return token_prefix_str + sym.get_value();
+ }
+}
+
void output_t::emit_tokens() {
if (tokens.empty())
return;
diff --git a/src/output.hpp b/src/output.hpp
index 8c457dd..da5fb2f 100644
--- a/src/output.hpp
+++ b/src/output.hpp
@@ -57,6 +57,8 @@ protected:
return token_prefix_str.c_str();
}
+ std::string symbol_case(const symbol_t &sym);
+
void emit_tokens();
void emit_token_value();
void emit_header();
diff --git a/src/output_lr0.cpp b/src/output_lr0.cpp
index 86206a3..b821333 100644
--- a/src/output_lr0.cpp
+++ b/src/output_lr0.cpp
@@ -42,11 +42,7 @@ void output_lr0_t::emit_state_shift(unsigned i) {
if (it == generator->get_shifts().end())
continue;
- if (token.get_type() == SYMBOL_TYPE_CHAR)
- std::fprintf(source_file, "\t\t\tcase '%c':\n", token.get_value()[0]);
- else
- std::fprintf(source_file, "\t\t\tcase %s%s:\n", token_prefix(), token.get_value().c_str());
-
+ std::fprintf(source_file, "\t\t\tcase %s:\n", symbol_case(token).c_str());
std::fprintf(source_file, "\t\t\t\tparser->stack[parser->top].value.token = *value;\n");
std::fprintf(source_file, "\t\t\t\tparser->stack[++parser->top].state = %u;\n", unsigned(it->second));
std::fprintf(source_file, "\t\t\t\treturn 1;\n\n");
diff --git a/src/output_slr.cpp b/src/output_slr.cpp
index ad0d415..371d55b 100644
--- a/src/output_slr.cpp
+++ b/src/output_slr.cpp
@@ -38,25 +38,14 @@ void output_slr_t::emit_state_shift(unsigned i, const symbol_t &token) {
if (it == generator->get_shifts().end())
return;
- if (token.get_type() == SYMBOL_TYPE_CHAR)
- std::fprintf(source_file, "\t\t\tcase '%c':\n", token.get_value()[0]);
- else if (!token.get_value().empty())
- std::fprintf(source_file, "\t\t\tcase %s%s:\n", token_prefix(), token.get_value().c_str());
- else
- std::fprintf(source_file, "\t\t\tcase 0:\n");
-
+ std::fprintf(source_file, "\t\t\tcase %s:\n", symbol_case(token).c_str());
std::fprintf(source_file, "\t\t\t\tparser->stack[parser->top].value.token = *value;\n");
std::fprintf(source_file, "\t\t\t\tparser->stack[++parser->top].state = %u;\n", unsigned(it->second));
std::fprintf(source_file, "\t\t\t\treturn 1;\n\n");
}
void output_slr_t::emit_state_reduce(const item_t &item, const symbol_t &token, int rule_id) {
- if (token.get_type() == SYMBOL_TYPE_CHAR)
- std::fprintf(source_file, "\t\t\tcase '%c':\n", token.get_value()[0]);
- else if (!token.get_value().empty())
- std::fprintf(source_file, "\t\t\tcase %s%s:\n", token_prefix(), token.get_value().c_str());
- else
- std::fprintf(source_file, "\t\t\tcase 0:\n");
+ std::fprintf(source_file, "\t\t\tcase %s:\n", symbol_case(token).c_str());
const auto &rhs = item.get_rhs();
if (rhs.size())