lex: change treatment of symbols which are neither all-uppercase or all-lowercase

This commit is contained in:
Matthias Schiffer 2015-04-17 20:48:06 +02:00
parent dd5fa160c2
commit 9624984557
4 changed files with 110 additions and 169 deletions

View file

@ -241,17 +241,17 @@ int lex_t::lex_symbol(parse_token_value_t *value) {
if (needspace) if (needspace)
return syntax_error(value); return syntax_error(value);
bool uc = true; bool uc = false;
bool lc = true; bool lc = false;
do { do {
switch (current()) { switch (current()) {
case 'A' ... 'Z': case 'A' ... 'Z':
lc = false; uc = true;
continue; continue;
case 'a' ... 'z': case 'a' ... 'z':
uc = false; lc = true;
continue; continue;
case '0' ... '9': case '0' ... '9':
@ -264,10 +264,8 @@ int lex_t::lex_symbol(parse_token_value_t *value) {
value->str = get_token(); value->str = get_token();
if (uc) if (uc && !lc)
return TOK_SYMBOL_UC; return TOK_SYMBOL_UC;
else if (lc)
return TOK_SYMBOL_LC;
else else
return TOK_SYMBOL; return TOK_SYMBOL;
} }
@ -420,6 +418,7 @@ int lex_t::lex(parse_token_value_t *value) {
case 'a' ... 'z': case 'a' ... 'z':
case 'A' ... 'Z': case 'A' ... 'Z':
case '_':
return lex_symbol(value); return lex_symbol(value);
case '%': case '%':

View file

@ -141,8 +141,6 @@ static inline std::string * parse_reduce_23(std::string *v, __attribute__((unuse
static inline std::string * parse_reduce_24(std::string *v, __attribute__((unused)) solar::grammar_t *grammar) {return v;} static inline std::string * parse_reduce_24(std::string *v, __attribute__((unused)) solar::grammar_t *grammar) {return v;}
static inline std::string * parse_reduce_25(std::string *v, __attribute__((unused)) solar::grammar_t *grammar) {return v;}
static int parse_do_push(parse_context_t *parser, int token, __attribute__((unused)) solar::grammar_t *grammar) { static int parse_do_push(parse_context_t *parser, int token, __attribute__((unused)) solar::grammar_t *grammar) {
parse_symbol_value_t result; parse_symbol_value_t result;
@ -161,7 +159,7 @@ static int parse_do_push(parse_context_t *parser, int token, __attribute__((unus
case 0: case 0:
return 0; return 0;
case TOK_SYMBOL_LC: case TOK_SYMBOL:
parser->stack[++parser->top].state = 4; parser->stack[++parser->top].state = 4;
return 1; return 1;
@ -378,7 +376,7 @@ static int parse_do_push(parse_context_t *parser, int token, __attribute__((unus
parser->stack[++parser->top].state = 30; parser->stack[++parser->top].state = 30;
return 1; return 1;
case TOK_SYMBOL_LC: case TOK_SYMBOL:
parser->stack[++parser->top].state = 31; parser->stack[++parser->top].state = 31;
return 1; return 1;
@ -489,8 +487,8 @@ static int parse_do_push(parse_context_t *parser, int token, __attribute__((unus
parser->stack[++parser->top].state = 25; parser->stack[++parser->top].state = 25;
break; break;
case 69: case 68:
parser->stack[++parser->top].state = 71; parser->stack[++parser->top].state = 70;
break; break;
} }
@ -515,7 +513,7 @@ static int parse_do_push(parse_context_t *parser, int token, __attribute__((unus
parser->stack[parser->top].value.symbol_term = result.symbol_term; parser->stack[parser->top].value.symbol_term = result.symbol_term;
switch (parser->stack[parser->top].state) { switch (parser->stack[parser->top].state) {
case 18: case 18:
case 69: case 68:
parser->stack[++parser->top].state = 26; parser->stack[++parser->top].state = 26;
break; break;
@ -561,8 +559,8 @@ static int parse_do_push(parse_context_t *parser, int token, __attribute__((unus
parser->stack[++parser->top].state = 25; parser->stack[++parser->top].state = 25;
break; break;
case 69: case 68:
parser->stack[++parser->top].state = 71; parser->stack[++parser->top].state = 70;
break; break;
} }
@ -578,7 +576,7 @@ static int parse_do_push(parse_context_t *parser, int token, __attribute__((unus
parser->stack[parser->top].value.symbol_term = result.symbol_term; parser->stack[parser->top].value.symbol_term = result.symbol_term;
switch (parser->stack[parser->top].state) { switch (parser->stack[parser->top].state) {
case 18: case 18:
case 69: case 68:
parser->stack[++parser->top].state = 26; parser->stack[++parser->top].state = 26;
break; break;
@ -654,7 +652,7 @@ static int parse_do_push(parse_context_t *parser, int token, __attribute__((unus
parser->stack[++parser->top].state = 32; parser->stack[++parser->top].state = 32;
return 1; return 1;
case TOK_SYMBOL_LC: case TOK_SYMBOL:
parser->stack[++parser->top].state = 45; parser->stack[++parser->top].state = 45;
return 1; return 1;
@ -669,16 +667,12 @@ static int parse_do_push(parse_context_t *parser, int token, __attribute__((unus
parser->stack[++parser->top].state = 47; parser->stack[++parser->top].state = 47;
return 1; return 1;
case TOK_SYMBOL_LC: case TOK_SYMBOL_UC:
parser->stack[++parser->top].state = 48; parser->stack[++parser->top].state = 48;
return 1; return 1;
case TOK_SYMBOL_UC:
parser->stack[++parser->top].state = 49;
return 1;
case '=': case '=':
parser->stack[++parser->top].state = 50; parser->stack[++parser->top].state = 49;
return 1; return 1;
default: default:
@ -689,7 +683,7 @@ static int parse_do_push(parse_context_t *parser, int token, __attribute__((unus
case 40: case 40:
switch (token) { switch (token) {
case 'u': case 'u':
parser->stack[++parser->top].state = 51; parser->stack[++parser->top].state = 50;
return 1; return 1;
default: default:
@ -700,7 +694,7 @@ static int parse_do_push(parse_context_t *parser, int token, __attribute__((unus
case 41: case 41:
switch (token) { switch (token) {
case '_': case '_':
parser->stack[++parser->top].state = 52; parser->stack[++parser->top].state = 51;
return 1; return 1;
default: default:
@ -711,7 +705,7 @@ static int parse_do_push(parse_context_t *parser, int token, __attribute__((unus
case 42: case 42:
switch (token) { switch (token) {
case 'r': case 'r':
parser->stack[++parser->top].state = 53; parser->stack[++parser->top].state = 52;
return 1; return 1;
default: default:
@ -722,7 +716,7 @@ static int parse_do_push(parse_context_t *parser, int token, __attribute__((unus
case 43: case 43:
switch (token) { switch (token) {
case 'e': case 'e':
parser->stack[++parser->top].state = 54; parser->stack[++parser->top].state = 53;
return 1; return 1;
default: default:
@ -733,7 +727,7 @@ static int parse_do_push(parse_context_t *parser, int token, __attribute__((unus
case 44: case 44:
switch (token) { switch (token) {
case TOK_BLOCK: case TOK_BLOCK:
parser->stack[++parser->top].state = 55; parser->stack[++parser->top].state = 54;
return 1; return 1;
default: default:
@ -744,7 +738,7 @@ static int parse_do_push(parse_context_t *parser, int token, __attribute__((unus
case 45: case 45:
switch (token) { switch (token) {
case TOK_BLOCK: case TOK_BLOCK:
parser->stack[++parser->top].state = 56; parser->stack[++parser->top].state = 55;
return 1; return 1;
default: default:
@ -755,7 +749,7 @@ static int parse_do_push(parse_context_t *parser, int token, __attribute__((unus
case 46: case 46:
switch (token) { switch (token) {
case ')': case ')':
parser->stack[++parser->top].state = 57; parser->stack[++parser->top].state = 56;
return 1; return 1;
default: default:
@ -767,29 +761,29 @@ static int parse_do_push(parse_context_t *parser, int token, __attribute__((unus
switch (token) { switch (token) {
default: default:
parser->top -= 1; parser->top -= 1;
result.symbol_varname = parse_reduce_25(parser->stack[parser->top + 0].value.token.str, grammar); result.symbol_varname = parse_reduce_24(parser->stack[parser->top + 0].value.token.str, grammar);
parser->stack[parser->top].value.symbol_varname = result.symbol_varname; parser->stack[parser->top].value.symbol_varname = result.symbol_varname;
switch (parser->stack[parser->top].state) { switch (parser->stack[parser->top].state) {
case 39: case 39:
parser->stack[++parser->top].state = 46; parser->stack[++parser->top].state = 46;
break; break;
case 50: case 49:
parser->stack[++parser->top].state = 58; parser->stack[++parser->top].state = 57;
break; break;
case 55: case 54:
parser->stack[++parser->top].state = 63; parser->stack[++parser->top].state = 62;
break;
case 69:
parser->stack[++parser->top].state = 71;
break; break;
case 70: case 70:
parser->stack[++parser->top].state = 72; parser->stack[++parser->top].state = 72;
break; break;
case 71:
parser->stack[++parser->top].state = 73;
break;
} }
} }
break; break;
@ -805,69 +799,45 @@ static int parse_do_push(parse_context_t *parser, int token, __attribute__((unus
parser->stack[++parser->top].state = 46; parser->stack[++parser->top].state = 46;
break; break;
case 50: case 49:
parser->stack[++parser->top].state = 58; parser->stack[++parser->top].state = 57;
break; break;
case 55: case 54:
parser->stack[++parser->top].state = 63; parser->stack[++parser->top].state = 62;
break;
case 69:
parser->stack[++parser->top].state = 71;
break; break;
case 70: case 70:
parser->stack[++parser->top].state = 72; parser->stack[++parser->top].state = 72;
break; break;
case 71:
parser->stack[++parser->top].state = 73;
break;
} }
} }
break; break;
case 49: case 49:
switch (token) { switch (token) {
case TOK_SYMBOL:
parser->stack[++parser->top].state = 47;
return 1;
case TOK_SYMBOL_UC:
parser->stack[++parser->top].state = 48;
return 1;
default: default:
parser->top -= 1; return -1;
result.symbol_varname = parse_reduce_24(parser->stack[parser->top + 0].value.token.str, grammar);
parser->stack[parser->top].value.symbol_varname = result.symbol_varname;
switch (parser->stack[parser->top].state) {
case 39:
parser->stack[++parser->top].state = 46;
break;
case 50:
parser->stack[++parser->top].state = 58;
break;
case 55:
parser->stack[++parser->top].state = 63;
break;
case 70:
parser->stack[++parser->top].state = 72;
break;
case 71:
parser->stack[++parser->top].state = 73;
break;
}
} }
break; break;
case 50: case 50:
switch (token) { switch (token) {
case TOK_SYMBOL: case 'c':
parser->stack[++parser->top].state = 47; parser->stack[++parser->top].state = 58;
return 1;
case TOK_SYMBOL_LC:
parser->stack[++parser->top].state = 48;
return 1;
case TOK_SYMBOL_UC:
parser->stack[++parser->top].state = 49;
return 1; return 1;
default: default:
@ -877,7 +847,7 @@ static int parse_do_push(parse_context_t *parser, int token, __attribute__((unus
case 51: case 51:
switch (token) { switch (token) {
case 'c': case 'a':
parser->stack[++parser->top].state = 59; parser->stack[++parser->top].state = 59;
return 1; return 1;
@ -888,7 +858,7 @@ static int parse_do_push(parse_context_t *parser, int token, __attribute__((unus
case 52: case 52:
switch (token) { switch (token) {
case 'a': case TOK_BLOCK:
parser->stack[++parser->top].state = 60; parser->stack[++parser->top].state = 60;
return 1; return 1;
@ -910,8 +880,12 @@ static int parse_do_push(parse_context_t *parser, int token, __attribute__((unus
case 54: case 54:
switch (token) { switch (token) {
case TOK_BLOCK: case TOK_SYMBOL:
parser->stack[++parser->top].state = 62; parser->stack[++parser->top].state = 47;
return 1;
case TOK_SYMBOL_UC:
parser->stack[++parser->top].state = 48;
return 1; return 1;
default: default:
@ -920,25 +894,6 @@ static int parse_do_push(parse_context_t *parser, int token, __attribute__((unus
break; break;
case 55: case 55:
switch (token) {
case TOK_SYMBOL:
parser->stack[++parser->top].state = 47;
return 1;
case TOK_SYMBOL_LC:
parser->stack[++parser->top].state = 48;
return 1;
case TOK_SYMBOL_UC:
parser->stack[++parser->top].state = 49;
return 1;
default:
return -1;
}
break;
case 56:
switch (token) { switch (token) {
default: default:
parser->top -= 7; parser->top -= 7;
@ -949,7 +904,7 @@ static int parse_do_push(parse_context_t *parser, int token, __attribute__((unus
} }
break; break;
case 57: case 56:
switch (token) { switch (token) {
default: default:
parser->top -= 5; parser->top -= 5;
@ -961,9 +916,20 @@ static int parse_do_push(parse_context_t *parser, int token, __attribute__((unus
} }
break; break;
case 58: case 57:
switch (token) { switch (token) {
case ')': case ')':
parser->stack[++parser->top].state = 63;
return 1;
default:
return -1;
}
break;
case 58:
switch (token) {
case 't':
parser->stack[++parser->top].state = 64; parser->stack[++parser->top].state = 64;
return 1; return 1;
@ -974,7 +940,7 @@ static int parse_do_push(parse_context_t *parser, int token, __attribute__((unus
case 59: case 59:
switch (token) { switch (token) {
case 't': case 'r':
parser->stack[++parser->top].state = 65; parser->stack[++parser->top].state = 65;
return 1; return 1;
@ -984,17 +950,6 @@ static int parse_do_push(parse_context_t *parser, int token, __attribute__((unus
break; break;
case 60: case 60:
switch (token) {
case 'r':
parser->stack[++parser->top].state = 66;
return 1;
default:
return -1;
}
break;
case 61:
switch (token) { switch (token) {
default: default:
parser->top -= 8; parser->top -= 8;
@ -1004,7 +959,7 @@ static int parse_do_push(parse_context_t *parser, int token, __attribute__((unus
} }
break; break;
case 62: case 61:
switch (token) { switch (token) {
default: default:
parser->top -= 8; parser->top -= 8;
@ -1014,7 +969,7 @@ static int parse_do_push(parse_context_t *parser, int token, __attribute__((unus
} }
break; break;
case 63: case 62:
switch (token) { switch (token) {
default: default:
parser->top -= 8; parser->top -= 8;
@ -1026,7 +981,7 @@ static int parse_do_push(parse_context_t *parser, int token, __attribute__((unus
} }
break; break;
case 64: case 63:
switch (token) { switch (token) {
default: default:
parser->top -= 6; parser->top -= 6;
@ -1038,9 +993,20 @@ static int parse_do_push(parse_context_t *parser, int token, __attribute__((unus
} }
break; break;
case 65: case 64:
switch (token) { switch (token) {
case 'o': case 'o':
parser->stack[++parser->top].state = 66;
return 1;
default:
return -1;
}
break;
case 65:
switch (token) {
case 'g':
parser->stack[++parser->top].state = 67; parser->stack[++parser->top].state = 67;
return 1; return 1;
@ -1051,7 +1017,7 @@ static int parse_do_push(parse_context_t *parser, int token, __attribute__((unus
case 66: case 66:
switch (token) { switch (token) {
case 'g': case 'r':
parser->stack[++parser->top].state = 68; parser->stack[++parser->top].state = 68;
return 1; return 1;
@ -1062,7 +1028,7 @@ static int parse_do_push(parse_context_t *parser, int token, __attribute__((unus
case 67: case 67:
switch (token) { switch (token) {
case 'r': case TOK_BLOCK:
parser->stack[++parser->top].state = 69; parser->stack[++parser->top].state = 69;
return 1; return 1;
@ -1073,8 +1039,16 @@ static int parse_do_push(parse_context_t *parser, int token, __attribute__((unus
case 68: case 68:
switch (token) { switch (token) {
case TOK_BLOCK: case TOK_CHAR:
parser->stack[++parser->top].state = 70; parser->stack[++parser->top].state = 28;
return 1;
case TOK_SYMBOL:
parser->stack[++parser->top].state = 31;
return 1;
case TOK_SYMBOL_UC:
parser->stack[++parser->top].state = 32;
return 1; return 1;
default: default:
@ -1084,16 +1058,12 @@ static int parse_do_push(parse_context_t *parser, int token, __attribute__((unus
case 69: case 69:
switch (token) { switch (token) {
case TOK_CHAR: case TOK_SYMBOL:
parser->stack[++parser->top].state = 28; parser->stack[++parser->top].state = 47;
return 1;
case TOK_SYMBOL_LC:
parser->stack[++parser->top].state = 31;
return 1; return 1;
case TOK_SYMBOL_UC: case TOK_SYMBOL_UC:
parser->stack[++parser->top].state = 32; parser->stack[++parser->top].state = 48;
return 1; return 1;
default: default:
@ -1107,12 +1077,8 @@ static int parse_do_push(parse_context_t *parser, int token, __attribute__((unus
parser->stack[++parser->top].state = 47; parser->stack[++parser->top].state = 47;
return 1; return 1;
case TOK_SYMBOL_LC:
parser->stack[++parser->top].state = 48;
return 1;
case TOK_SYMBOL_UC: case TOK_SYMBOL_UC:
parser->stack[++parser->top].state = 49; parser->stack[++parser->top].state = 48;
return 1; return 1;
default: default:
@ -1121,25 +1087,6 @@ static int parse_do_push(parse_context_t *parser, int token, __attribute__((unus
break; break;
case 71: case 71:
switch (token) {
case TOK_SYMBOL:
parser->stack[++parser->top].state = 47;
return 1;
case TOK_SYMBOL_LC:
parser->stack[++parser->top].state = 48;
return 1;
case TOK_SYMBOL_UC:
parser->stack[++parser->top].state = 49;
return 1;
default:
return -1;
}
break;
case 72:
switch (token) { switch (token) {
default: default:
parser->top -= 12; parser->top -= 12;
@ -1150,7 +1097,7 @@ static int parse_do_push(parse_context_t *parser, int token, __attribute__((unus
} }
break; break;
case 73: case 72:
switch (token) { switch (token) {
default: default:
parser->top -= 13; parser->top -= 13;

View file

@ -10,8 +10,7 @@ typedef enum parse_token {
TOK_SQBLOCK = 258, TOK_SQBLOCK = 258,
TOK_STRING = 259, TOK_STRING = 259,
TOK_SYMBOL = 260, TOK_SYMBOL = 260,
TOK_SYMBOL_LC = 261, TOK_SYMBOL_UC = 261,
TOK_SYMBOL_UC = 262,
} parse_token_t; } parse_token_t;
typedef struct parse_token_value { typedef struct parse_token_value {

View file

@ -31,9 +31,6 @@ static inline void free_rhs(rhs_t *v) {
%type SYMBOL_UC {std::string *} str %type SYMBOL_UC {std::string *} str
%destructor SYMBOL_UC free_string %destructor SYMBOL_UC free_string
%type SYMBOL_LC {std::string *} str
%destructor SYMBOL_LC free_string
%type BLOCK {std::string *} str %type BLOCK {std::string *} str
%destructor BLOCK free_string %destructor BLOCK free_string
@ -72,7 +69,7 @@ grammar |=;
grammar |= grammar directive; grammar |= grammar directive;
directive |= "%type" SYMBOL_LC(nonterm) BLOCK(type) { directive |= "%type" SYMBOL(nonterm) BLOCK(type) {
grammar->nonterm_types.insert(std::make_pair(*nonterm, *type)); grammar->nonterm_types.insert(std::make_pair(*nonterm, *type));
} }
@ -107,7 +104,7 @@ directive |= rule(rule) {
} }
rule |= SYMBOL_LC(lhs) "|=" rhs(rhs) action(action) rule |= SYMBOL(lhs) "|=" rhs(rhs) action(action)
[new solar::rule_t {solar::item_t(*lhs, rhs->first), rhs->second, *action}] [new solar::rule_t {solar::item_t(*lhs, rhs->first), rhs->second, *action}]
@ -149,12 +146,11 @@ action |= BLOCK(=v) [v]
action |= SQBLOCK(v) [new std::string("return " + *v + ";")] action |= SQBLOCK(v) [new std::string("return " + *v + ";")]
symbol |= SYMBOL_LC(v) [new solar::symbol_t(solar::symbol_t::make_nonterm(*v))] symbol |= SYMBOL(v) [new solar::symbol_t(solar::symbol_t::make_nonterm(*v))]
symbol |= term(=v) [v] symbol |= term(=v) [v]
term |= SYMBOL_UC(v) [new solar::symbol_t(solar::symbol_t::make_term(*v))] term |= SYMBOL_UC(v) [new solar::symbol_t(solar::symbol_t::make_term(*v))]
term |= CHAR(v) [new solar::symbol_t(solar::symbol_t::make_char(v))] term |= CHAR(v) [new solar::symbol_t(solar::symbol_t::make_char(v))]
varname |= SYMBOL_LC(=v) [v]
varname |= SYMBOL_UC(=v) [v] varname |= SYMBOL_UC(=v) [v]
varname |= SYMBOL(=v) [v] varname |= SYMBOL(=v) [v]