lex: change treatment of symbols which are neither all-uppercase or all-lowercase

This commit is contained in:
Matthias Schiffer 2015-04-17 20:48:06 +02:00
parent dd5fa160c2
commit 9624984557
4 changed files with 110 additions and 169 deletions

View file

@ -241,17 +241,17 @@ int lex_t::lex_symbol(parse_token_value_t *value) {
if (needspace)
return syntax_error(value);
bool uc = true;
bool lc = true;
bool uc = false;
bool lc = false;
do {
switch (current()) {
case 'A' ... 'Z':
lc = false;
uc = true;
continue;
case 'a' ... 'z':
uc = false;
lc = true;
continue;
case '0' ... '9':
@ -264,10 +264,8 @@ int lex_t::lex_symbol(parse_token_value_t *value) {
value->str = get_token();
if (uc)
if (uc && !lc)
return TOK_SYMBOL_UC;
else if (lc)
return TOK_SYMBOL_LC;
else
return TOK_SYMBOL;
}
@ -420,6 +418,7 @@ int lex_t::lex(parse_token_value_t *value) {
case 'a' ... 'z':
case 'A' ... 'Z':
case '_':
return lex_symbol(value);
case '%':

View file

@ -141,8 +141,6 @@ static inline std::string * parse_reduce_23(std::string *v, __attribute__((unuse
static inline std::string * parse_reduce_24(std::string *v, __attribute__((unused)) solar::grammar_t *grammar) {return v;}
static inline std::string * parse_reduce_25(std::string *v, __attribute__((unused)) solar::grammar_t *grammar) {return v;}
static int parse_do_push(parse_context_t *parser, int token, __attribute__((unused)) solar::grammar_t *grammar) {
parse_symbol_value_t result;
@ -161,7 +159,7 @@ static int parse_do_push(parse_context_t *parser, int token, __attribute__((unus
case 0:
return 0;
case TOK_SYMBOL_LC:
case TOK_SYMBOL:
parser->stack[++parser->top].state = 4;
return 1;
@ -378,7 +376,7 @@ static int parse_do_push(parse_context_t *parser, int token, __attribute__((unus
parser->stack[++parser->top].state = 30;
return 1;
case TOK_SYMBOL_LC:
case TOK_SYMBOL:
parser->stack[++parser->top].state = 31;
return 1;
@ -489,8 +487,8 @@ static int parse_do_push(parse_context_t *parser, int token, __attribute__((unus
parser->stack[++parser->top].state = 25;
break;
case 69:
parser->stack[++parser->top].state = 71;
case 68:
parser->stack[++parser->top].state = 70;
break;
}
@ -515,7 +513,7 @@ static int parse_do_push(parse_context_t *parser, int token, __attribute__((unus
parser->stack[parser->top].value.symbol_term = result.symbol_term;
switch (parser->stack[parser->top].state) {
case 18:
case 69:
case 68:
parser->stack[++parser->top].state = 26;
break;
@ -561,8 +559,8 @@ static int parse_do_push(parse_context_t *parser, int token, __attribute__((unus
parser->stack[++parser->top].state = 25;
break;
case 69:
parser->stack[++parser->top].state = 71;
case 68:
parser->stack[++parser->top].state = 70;
break;
}
@ -578,7 +576,7 @@ static int parse_do_push(parse_context_t *parser, int token, __attribute__((unus
parser->stack[parser->top].value.symbol_term = result.symbol_term;
switch (parser->stack[parser->top].state) {
case 18:
case 69:
case 68:
parser->stack[++parser->top].state = 26;
break;
@ -654,7 +652,7 @@ static int parse_do_push(parse_context_t *parser, int token, __attribute__((unus
parser->stack[++parser->top].state = 32;
return 1;
case TOK_SYMBOL_LC:
case TOK_SYMBOL:
parser->stack[++parser->top].state = 45;
return 1;
@ -669,16 +667,12 @@ static int parse_do_push(parse_context_t *parser, int token, __attribute__((unus
parser->stack[++parser->top].state = 47;
return 1;
case TOK_SYMBOL_LC:
case TOK_SYMBOL_UC:
parser->stack[++parser->top].state = 48;
return 1;
case TOK_SYMBOL_UC:
parser->stack[++parser->top].state = 49;
return 1;
case '=':
parser->stack[++parser->top].state = 50;
parser->stack[++parser->top].state = 49;
return 1;
default:
@ -689,7 +683,7 @@ static int parse_do_push(parse_context_t *parser, int token, __attribute__((unus
case 40:
switch (token) {
case 'u':
parser->stack[++parser->top].state = 51;
parser->stack[++parser->top].state = 50;
return 1;
default:
@ -700,7 +694,7 @@ static int parse_do_push(parse_context_t *parser, int token, __attribute__((unus
case 41:
switch (token) {
case '_':
parser->stack[++parser->top].state = 52;
parser->stack[++parser->top].state = 51;
return 1;
default:
@ -711,7 +705,7 @@ static int parse_do_push(parse_context_t *parser, int token, __attribute__((unus
case 42:
switch (token) {
case 'r':
parser->stack[++parser->top].state = 53;
parser->stack[++parser->top].state = 52;
return 1;
default:
@ -722,7 +716,7 @@ static int parse_do_push(parse_context_t *parser, int token, __attribute__((unus
case 43:
switch (token) {
case 'e':
parser->stack[++parser->top].state = 54;
parser->stack[++parser->top].state = 53;
return 1;
default:
@ -733,7 +727,7 @@ static int parse_do_push(parse_context_t *parser, int token, __attribute__((unus
case 44:
switch (token) {
case TOK_BLOCK:
parser->stack[++parser->top].state = 55;
parser->stack[++parser->top].state = 54;
return 1;
default:
@ -744,7 +738,7 @@ static int parse_do_push(parse_context_t *parser, int token, __attribute__((unus
case 45:
switch (token) {
case TOK_BLOCK:
parser->stack[++parser->top].state = 56;
parser->stack[++parser->top].state = 55;
return 1;
default:
@ -755,7 +749,7 @@ static int parse_do_push(parse_context_t *parser, int token, __attribute__((unus
case 46:
switch (token) {
case ')':
parser->stack[++parser->top].state = 57;
parser->stack[++parser->top].state = 56;
return 1;
default:
@ -767,29 +761,29 @@ static int parse_do_push(parse_context_t *parser, int token, __attribute__((unus
switch (token) {
default:
parser->top -= 1;
result.symbol_varname = parse_reduce_25(parser->stack[parser->top + 0].value.token.str, grammar);
result.symbol_varname = parse_reduce_24(parser->stack[parser->top + 0].value.token.str, grammar);
parser->stack[parser->top].value.symbol_varname = result.symbol_varname;
switch (parser->stack[parser->top].state) {
case 39:
parser->stack[++parser->top].state = 46;
break;
case 50:
parser->stack[++parser->top].state = 58;
case 49:
parser->stack[++parser->top].state = 57;
break;
case 55:
parser->stack[++parser->top].state = 63;
case 54:
parser->stack[++parser->top].state = 62;
break;
case 69:
parser->stack[++parser->top].state = 71;
break;
case 70:
parser->stack[++parser->top].state = 72;
break;
case 71:
parser->stack[++parser->top].state = 73;
break;
}
}
break;
@ -805,69 +799,45 @@ static int parse_do_push(parse_context_t *parser, int token, __attribute__((unus
parser->stack[++parser->top].state = 46;
break;
case 50:
parser->stack[++parser->top].state = 58;
case 49:
parser->stack[++parser->top].state = 57;
break;
case 55:
parser->stack[++parser->top].state = 63;
case 54:
parser->stack[++parser->top].state = 62;
break;
case 69:
parser->stack[++parser->top].state = 71;
break;
case 70:
parser->stack[++parser->top].state = 72;
break;
case 71:
parser->stack[++parser->top].state = 73;
break;
}
}
break;
case 49:
switch (token) {
case TOK_SYMBOL:
parser->stack[++parser->top].state = 47;
return 1;
case TOK_SYMBOL_UC:
parser->stack[++parser->top].state = 48;
return 1;
default:
parser->top -= 1;
result.symbol_varname = parse_reduce_24(parser->stack[parser->top + 0].value.token.str, grammar);
parser->stack[parser->top].value.symbol_varname = result.symbol_varname;
switch (parser->stack[parser->top].state) {
case 39:
parser->stack[++parser->top].state = 46;
break;
case 50:
parser->stack[++parser->top].state = 58;
break;
case 55:
parser->stack[++parser->top].state = 63;
break;
case 70:
parser->stack[++parser->top].state = 72;
break;
case 71:
parser->stack[++parser->top].state = 73;
break;
}
return -1;
}
break;
case 50:
switch (token) {
case TOK_SYMBOL:
parser->stack[++parser->top].state = 47;
return 1;
case TOK_SYMBOL_LC:
parser->stack[++parser->top].state = 48;
return 1;
case TOK_SYMBOL_UC:
parser->stack[++parser->top].state = 49;
case 'c':
parser->stack[++parser->top].state = 58;
return 1;
default:
@ -877,7 +847,7 @@ static int parse_do_push(parse_context_t *parser, int token, __attribute__((unus
case 51:
switch (token) {
case 'c':
case 'a':
parser->stack[++parser->top].state = 59;
return 1;
@ -888,7 +858,7 @@ static int parse_do_push(parse_context_t *parser, int token, __attribute__((unus
case 52:
switch (token) {
case 'a':
case TOK_BLOCK:
parser->stack[++parser->top].state = 60;
return 1;
@ -910,8 +880,12 @@ static int parse_do_push(parse_context_t *parser, int token, __attribute__((unus
case 54:
switch (token) {
case TOK_BLOCK:
parser->stack[++parser->top].state = 62;
case TOK_SYMBOL:
parser->stack[++parser->top].state = 47;
return 1;
case TOK_SYMBOL_UC:
parser->stack[++parser->top].state = 48;
return 1;
default:
@ -920,25 +894,6 @@ static int parse_do_push(parse_context_t *parser, int token, __attribute__((unus
break;
case 55:
switch (token) {
case TOK_SYMBOL:
parser->stack[++parser->top].state = 47;
return 1;
case TOK_SYMBOL_LC:
parser->stack[++parser->top].state = 48;
return 1;
case TOK_SYMBOL_UC:
parser->stack[++parser->top].state = 49;
return 1;
default:
return -1;
}
break;
case 56:
switch (token) {
default:
parser->top -= 7;
@ -949,7 +904,7 @@ static int parse_do_push(parse_context_t *parser, int token, __attribute__((unus
}
break;
case 57:
case 56:
switch (token) {
default:
parser->top -= 5;
@ -961,9 +916,20 @@ static int parse_do_push(parse_context_t *parser, int token, __attribute__((unus
}
break;
case 58:
case 57:
switch (token) {
case ')':
parser->stack[++parser->top].state = 63;
return 1;
default:
return -1;
}
break;
case 58:
switch (token) {
case 't':
parser->stack[++parser->top].state = 64;
return 1;
@ -974,7 +940,7 @@ static int parse_do_push(parse_context_t *parser, int token, __attribute__((unus
case 59:
switch (token) {
case 't':
case 'r':
parser->stack[++parser->top].state = 65;
return 1;
@ -984,17 +950,6 @@ static int parse_do_push(parse_context_t *parser, int token, __attribute__((unus
break;
case 60:
switch (token) {
case 'r':
parser->stack[++parser->top].state = 66;
return 1;
default:
return -1;
}
break;
case 61:
switch (token) {
default:
parser->top -= 8;
@ -1004,7 +959,7 @@ static int parse_do_push(parse_context_t *parser, int token, __attribute__((unus
}
break;
case 62:
case 61:
switch (token) {
default:
parser->top -= 8;
@ -1014,7 +969,7 @@ static int parse_do_push(parse_context_t *parser, int token, __attribute__((unus
}
break;
case 63:
case 62:
switch (token) {
default:
parser->top -= 8;
@ -1026,7 +981,7 @@ static int parse_do_push(parse_context_t *parser, int token, __attribute__((unus
}
break;
case 64:
case 63:
switch (token) {
default:
parser->top -= 6;
@ -1038,9 +993,20 @@ static int parse_do_push(parse_context_t *parser, int token, __attribute__((unus
}
break;
case 65:
case 64:
switch (token) {
case 'o':
parser->stack[++parser->top].state = 66;
return 1;
default:
return -1;
}
break;
case 65:
switch (token) {
case 'g':
parser->stack[++parser->top].state = 67;
return 1;
@ -1051,7 +1017,7 @@ static int parse_do_push(parse_context_t *parser, int token, __attribute__((unus
case 66:
switch (token) {
case 'g':
case 'r':
parser->stack[++parser->top].state = 68;
return 1;
@ -1062,7 +1028,7 @@ static int parse_do_push(parse_context_t *parser, int token, __attribute__((unus
case 67:
switch (token) {
case 'r':
case TOK_BLOCK:
parser->stack[++parser->top].state = 69;
return 1;
@ -1073,8 +1039,16 @@ static int parse_do_push(parse_context_t *parser, int token, __attribute__((unus
case 68:
switch (token) {
case TOK_BLOCK:
parser->stack[++parser->top].state = 70;
case TOK_CHAR:
parser->stack[++parser->top].state = 28;
return 1;
case TOK_SYMBOL:
parser->stack[++parser->top].state = 31;
return 1;
case TOK_SYMBOL_UC:
parser->stack[++parser->top].state = 32;
return 1;
default:
@ -1084,16 +1058,12 @@ static int parse_do_push(parse_context_t *parser, int token, __attribute__((unus
case 69:
switch (token) {
case TOK_CHAR:
parser->stack[++parser->top].state = 28;
return 1;
case TOK_SYMBOL_LC:
parser->stack[++parser->top].state = 31;
case TOK_SYMBOL:
parser->stack[++parser->top].state = 47;
return 1;
case TOK_SYMBOL_UC:
parser->stack[++parser->top].state = 32;
parser->stack[++parser->top].state = 48;
return 1;
default:
@ -1107,12 +1077,8 @@ static int parse_do_push(parse_context_t *parser, int token, __attribute__((unus
parser->stack[++parser->top].state = 47;
return 1;
case TOK_SYMBOL_LC:
parser->stack[++parser->top].state = 48;
return 1;
case TOK_SYMBOL_UC:
parser->stack[++parser->top].state = 49;
parser->stack[++parser->top].state = 48;
return 1;
default:
@ -1121,25 +1087,6 @@ static int parse_do_push(parse_context_t *parser, int token, __attribute__((unus
break;
case 71:
switch (token) {
case TOK_SYMBOL:
parser->stack[++parser->top].state = 47;
return 1;
case TOK_SYMBOL_LC:
parser->stack[++parser->top].state = 48;
return 1;
case TOK_SYMBOL_UC:
parser->stack[++parser->top].state = 49;
return 1;
default:
return -1;
}
break;
case 72:
switch (token) {
default:
parser->top -= 12;
@ -1150,7 +1097,7 @@ static int parse_do_push(parse_context_t *parser, int token, __attribute__((unus
}
break;
case 73:
case 72:
switch (token) {
default:
parser->top -= 13;

View file

@ -10,8 +10,7 @@ typedef enum parse_token {
TOK_SQBLOCK = 258,
TOK_STRING = 259,
TOK_SYMBOL = 260,
TOK_SYMBOL_LC = 261,
TOK_SYMBOL_UC = 262,
TOK_SYMBOL_UC = 261,
} parse_token_t;
typedef struct parse_token_value {

View file

@ -31,9 +31,6 @@ static inline void free_rhs(rhs_t *v) {
%type SYMBOL_UC {std::string *} str
%destructor SYMBOL_UC free_string
%type SYMBOL_LC {std::string *} str
%destructor SYMBOL_LC free_string
%type BLOCK {std::string *} str
%destructor BLOCK free_string
@ -72,7 +69,7 @@ grammar |=;
grammar |= grammar directive;
directive |= "%type" SYMBOL_LC(nonterm) BLOCK(type) {
directive |= "%type" SYMBOL(nonterm) BLOCK(type) {
grammar->nonterm_types.insert(std::make_pair(*nonterm, *type));
}
@ -107,7 +104,7 @@ directive |= rule(rule) {
}
rule |= SYMBOL_LC(lhs) "|=" rhs(rhs) action(action)
rule |= SYMBOL(lhs) "|=" rhs(rhs) action(action)
[new solar::rule_t {solar::item_t(*lhs, rhs->first), rhs->second, *action}]
@ -149,12 +146,11 @@ action |= BLOCK(=v) [v]
action |= SQBLOCK(v) [new std::string("return " + *v + ";")]
symbol |= SYMBOL_LC(v) [new solar::symbol_t(solar::symbol_t::make_nonterm(*v))]
symbol |= SYMBOL(v) [new solar::symbol_t(solar::symbol_t::make_nonterm(*v))]
symbol |= term(=v) [v]
term |= SYMBOL_UC(v) [new solar::symbol_t(solar::symbol_t::make_term(*v))]
term |= CHAR(v) [new solar::symbol_t(solar::symbol_t::make_char(v))]
varname |= SYMBOL_LC(=v) [v]
varname |= SYMBOL_UC(=v) [v]
varname |= SYMBOL(=v) [v]