Don't parse keywords in the lexer
This commit is contained in:
parent
650fff74de
commit
634e5db209
5 changed files with 1076 additions and 1210 deletions
92
src/lex.cpp
92
src/lex.cpp
|
@ -34,26 +34,6 @@
|
|||
|
||||
namespace solar {
|
||||
|
||||
|
||||
struct keyword_t {
|
||||
const char *keyword;
|
||||
int token;
|
||||
};
|
||||
|
||||
/* the keyword list must be sorted */
|
||||
static const keyword_t keywords[] = {
|
||||
{"%extra_arg", TOK_EXTRA_ARG},
|
||||
{"%header", TOK_HEADER},
|
||||
{"%source", TOK_SOURCE},
|
||||
{"%type", TOK_TYPE},
|
||||
};
|
||||
|
||||
static int compare_keywords(const void *v1, const void *v2) {
|
||||
const keyword_t *k1 = static_cast<const keyword_t*>(v1), *k2 = static_cast<const keyword_t*>(v2);
|
||||
return std::strcmp(k1->keyword, k2->keyword);
|
||||
}
|
||||
|
||||
|
||||
bool lex_t::advance() {
|
||||
if (start > 0) {
|
||||
std::memmove(buffer, buffer+start, end - start);
|
||||
|
@ -183,61 +163,6 @@ int lex_t::lex_string(parse_token_value_t *value) {
|
|||
return TOK_STRING;
|
||||
}
|
||||
|
||||
/*
|
||||
int lex_t::lex_number(parse_token_value_t *value) {
|
||||
if (needspace)
|
||||
return syntax_error(value);
|
||||
|
||||
while (next(false)) {
|
||||
char cur = current();
|
||||
|
||||
if (cur >= '0' && cur <= '9')
|
||||
continue;
|
||||
|
||||
if (cur == 'x' || (cur >= 'a' && cur <= 'f') || (cur >= 'A' && cur <= 'F'))
|
||||
continue;
|
||||
|
||||
break;
|
||||
}
|
||||
|
||||
char *endptr, *token = get_token();
|
||||
value->number = std::strtoull(token, &endptr, 0);
|
||||
|
||||
bool ok = !*endptr;
|
||||
free(token);
|
||||
|
||||
if (!ok)
|
||||
return syntax_error(value);
|
||||
|
||||
consume(true);
|
||||
|
||||
return TOK_UINT;
|
||||
}*/
|
||||
|
||||
int lex_t::lex_keyword(parse_token_value_t *value) {
|
||||
if (needspace)
|
||||
return syntax_error(value);
|
||||
|
||||
while (next(false)) {
|
||||
char cur = current();
|
||||
|
||||
if (!((cur >= 'a' && cur <= 'z') || (cur >= '0' && cur <= '9') || cur == '_'))
|
||||
break;
|
||||
}
|
||||
|
||||
std::string *token = get_token();
|
||||
const keyword_t key = { .keyword = token->c_str(), .token = 0 };
|
||||
const keyword_t *ret = static_cast<const keyword_t*>(bsearch(&key, keywords, array_size(keywords), sizeof(keyword_t), compare_keywords));
|
||||
delete token;
|
||||
|
||||
if (!ret)
|
||||
return syntax_error(value);
|
||||
|
||||
consume(true);
|
||||
|
||||
return ret->token;
|
||||
}
|
||||
|
||||
int lex_t::unterminated_block(parse_token_value_t *value) {
|
||||
if (ferror(file))
|
||||
return io_error(value);
|
||||
|
@ -366,8 +291,18 @@ int lex_t::lex(parse_token_value_t *value) {
|
|||
case '\r':
|
||||
next(true);
|
||||
consume(false);
|
||||
dumb_mode = false;
|
||||
continue;
|
||||
}
|
||||
|
||||
if (dumb_mode) {
|
||||
token = current();
|
||||
next(true);
|
||||
consume(false);
|
||||
return token;
|
||||
}
|
||||
|
||||
switch (current()) {
|
||||
case ';':
|
||||
case ':':
|
||||
case '|':
|
||||
|
@ -432,7 +367,12 @@ int lex_t::lex(parse_token_value_t *value) {
|
|||
return lex_symbol(value);
|
||||
|
||||
case '%':
|
||||
return lex_keyword(value);
|
||||
dumb_mode = true;
|
||||
|
||||
token = current();
|
||||
next(true);
|
||||
consume(false);
|
||||
return token;
|
||||
|
||||
default:
|
||||
return syntax_error(value);
|
||||
|
|
|
@ -51,6 +51,7 @@ private:
|
|||
std::FILE *file;
|
||||
|
||||
bool needspace;
|
||||
bool dumb_mode;
|
||||
|
||||
size_t start;
|
||||
size_t end;
|
||||
|
@ -69,8 +70,6 @@ private:
|
|||
int unterminated_string(parse_token_value_t *value);
|
||||
|
||||
int lex_string(parse_token_value_t *value);
|
||||
//int lex_number(parse_token_value_t *value);
|
||||
int lex_keyword(parse_token_value_t *value);
|
||||
int lex_block(parse_token_value_t *value);
|
||||
int lex_symbol(parse_token_value_t *value);
|
||||
|
||||
|
|
2167
src/parse.cpp
2167
src/parse.cpp
File diff suppressed because it is too large
Load diff
|
@ -6,14 +6,10 @@
|
|||
enum parse_token_t {
|
||||
TOK_BLOCK = 256,
|
||||
TOK_CHAR = 257,
|
||||
TOK_EXTRA_ARG = 258,
|
||||
TOK_HEADER = 259,
|
||||
TOK_SOURCE = 260,
|
||||
TOK_STRING = 261,
|
||||
TOK_SYMBOL = 262,
|
||||
TOK_SYMBOL_LC = 263,
|
||||
TOK_SYMBOL_UC = 264,
|
||||
TOK_TYPE = 265,
|
||||
TOK_STRING = 258,
|
||||
TOK_SYMBOL = 259,
|
||||
TOK_SYMBOL_LC = 260,
|
||||
TOK_SYMBOL_UC = 261,
|
||||
};
|
||||
|
||||
typedef struct parse_token_value {
|
||||
|
|
10
src/parse.y
10
src/parse.y
|
@ -34,14 +34,14 @@ directive |= rule(rule) {
|
|||
delete rule;
|
||||
}
|
||||
|
||||
directive |= TYPE SYMBOL_LC(nonterm) BLOCK(type) {
|
||||
directive |= "%type" SYMBOL_LC(nonterm) BLOCK(type) {
|
||||
grammar->nonterm_types.insert(std::make_pair(*nonterm, *type));
|
||||
|
||||
delete nonterm;
|
||||
delete type;
|
||||
}
|
||||
|
||||
directive |= TYPE term(term) BLOCK(type) varname(name) {
|
||||
directive |= "%type" term(term) BLOCK(type) varname(name) {
|
||||
grammar->term_types.insert(std::make_pair(*term, std::make_pair(*type, *name)));
|
||||
|
||||
delete term;
|
||||
|
@ -49,17 +49,17 @@ directive |= TYPE term(term) BLOCK(type) varname(name) {
|
|||
delete name;
|
||||
}
|
||||
|
||||
directive |= SOURCE BLOCK(block) {
|
||||
directive |= "%source" BLOCK(block) {
|
||||
grammar->source_block = *block;
|
||||
delete block;
|
||||
}
|
||||
|
||||
directive |= HEADER BLOCK(block) {
|
||||
directive |= "%header" BLOCK(block) {
|
||||
grammar->header_block = *block;
|
||||
delete block;
|
||||
}
|
||||
|
||||
directive |= EXTRA_ARG BLOCK(type) varname(name) {
|
||||
directive |= "%extra_arg" BLOCK(type) varname(name) {
|
||||
grammar->extra_args.push_back(std::make_pair(*type, *name));
|
||||
|
||||
delete type;
|
||||
|
|
Reference in a new issue