Don't parse keywords in the lexer

This commit is contained in:
Matthias Schiffer 2015-04-10 18:01:10 +02:00
parent 650fff74de
commit 634e5db209
5 changed files with 1076 additions and 1210 deletions

View file

@ -34,26 +34,6 @@
namespace solar { namespace solar {
struct keyword_t {
const char *keyword;
int token;
};
/* the keyword list must be sorted */
static const keyword_t keywords[] = {
{"%extra_arg", TOK_EXTRA_ARG},
{"%header", TOK_HEADER},
{"%source", TOK_SOURCE},
{"%type", TOK_TYPE},
};
static int compare_keywords(const void *v1, const void *v2) {
const keyword_t *k1 = static_cast<const keyword_t*>(v1), *k2 = static_cast<const keyword_t*>(v2);
return std::strcmp(k1->keyword, k2->keyword);
}
bool lex_t::advance() { bool lex_t::advance() {
if (start > 0) { if (start > 0) {
std::memmove(buffer, buffer+start, end - start); std::memmove(buffer, buffer+start, end - start);
@ -183,61 +163,6 @@ int lex_t::lex_string(parse_token_value_t *value) {
return TOK_STRING; return TOK_STRING;
} }
/*
int lex_t::lex_number(parse_token_value_t *value) {
if (needspace)
return syntax_error(value);
while (next(false)) {
char cur = current();
if (cur >= '0' && cur <= '9')
continue;
if (cur == 'x' || (cur >= 'a' && cur <= 'f') || (cur >= 'A' && cur <= 'F'))
continue;
break;
}
char *endptr, *token = get_token();
value->number = std::strtoull(token, &endptr, 0);
bool ok = !*endptr;
free(token);
if (!ok)
return syntax_error(value);
consume(true);
return TOK_UINT;
}*/
int lex_t::lex_keyword(parse_token_value_t *value) {
if (needspace)
return syntax_error(value);
while (next(false)) {
char cur = current();
if (!((cur >= 'a' && cur <= 'z') || (cur >= '0' && cur <= '9') || cur == '_'))
break;
}
std::string *token = get_token();
const keyword_t key = { .keyword = token->c_str(), .token = 0 };
const keyword_t *ret = static_cast<const keyword_t*>(bsearch(&key, keywords, array_size(keywords), sizeof(keyword_t), compare_keywords));
delete token;
if (!ret)
return syntax_error(value);
consume(true);
return ret->token;
}
int lex_t::unterminated_block(parse_token_value_t *value) { int lex_t::unterminated_block(parse_token_value_t *value) {
if (ferror(file)) if (ferror(file))
return io_error(value); return io_error(value);
@ -366,8 +291,18 @@ int lex_t::lex(parse_token_value_t *value) {
case '\r': case '\r':
next(true); next(true);
consume(false); consume(false);
dumb_mode = false;
continue; continue;
}
if (dumb_mode) {
token = current();
next(true);
consume(false);
return token;
}
switch (current()) {
case ';': case ';':
case ':': case ':':
case '|': case '|':
@ -432,7 +367,12 @@ int lex_t::lex(parse_token_value_t *value) {
return lex_symbol(value); return lex_symbol(value);
case '%': case '%':
return lex_keyword(value); dumb_mode = true;
token = current();
next(true);
consume(false);
return token;
default: default:
return syntax_error(value); return syntax_error(value);

View file

@ -51,6 +51,7 @@ private:
std::FILE *file; std::FILE *file;
bool needspace; bool needspace;
bool dumb_mode;
size_t start; size_t start;
size_t end; size_t end;
@ -69,8 +70,6 @@ private:
int unterminated_string(parse_token_value_t *value); int unterminated_string(parse_token_value_t *value);
int lex_string(parse_token_value_t *value); int lex_string(parse_token_value_t *value);
//int lex_number(parse_token_value_t *value);
int lex_keyword(parse_token_value_t *value);
int lex_block(parse_token_value_t *value); int lex_block(parse_token_value_t *value);
int lex_symbol(parse_token_value_t *value); int lex_symbol(parse_token_value_t *value);

File diff suppressed because it is too large Load diff

View file

@ -6,14 +6,10 @@
enum parse_token_t { enum parse_token_t {
TOK_BLOCK = 256, TOK_BLOCK = 256,
TOK_CHAR = 257, TOK_CHAR = 257,
TOK_EXTRA_ARG = 258, TOK_STRING = 258,
TOK_HEADER = 259, TOK_SYMBOL = 259,
TOK_SOURCE = 260, TOK_SYMBOL_LC = 260,
TOK_STRING = 261, TOK_SYMBOL_UC = 261,
TOK_SYMBOL = 262,
TOK_SYMBOL_LC = 263,
TOK_SYMBOL_UC = 264,
TOK_TYPE = 265,
}; };
typedef struct parse_token_value { typedef struct parse_token_value {

View file

@ -34,14 +34,14 @@ directive |= rule(rule) {
delete rule; delete rule;
} }
directive |= TYPE SYMBOL_LC(nonterm) BLOCK(type) { directive |= "%type" SYMBOL_LC(nonterm) BLOCK(type) {
grammar->nonterm_types.insert(std::make_pair(*nonterm, *type)); grammar->nonterm_types.insert(std::make_pair(*nonterm, *type));
delete nonterm; delete nonterm;
delete type; delete type;
} }
directive |= TYPE term(term) BLOCK(type) varname(name) { directive |= "%type" term(term) BLOCK(type) varname(name) {
grammar->term_types.insert(std::make_pair(*term, std::make_pair(*type, *name))); grammar->term_types.insert(std::make_pair(*term, std::make_pair(*type, *name)));
delete term; delete term;
@ -49,17 +49,17 @@ directive |= TYPE term(term) BLOCK(type) varname(name) {
delete name; delete name;
} }
directive |= SOURCE BLOCK(block) { directive |= "%source" BLOCK(block) {
grammar->source_block = *block; grammar->source_block = *block;
delete block; delete block;
} }
directive |= HEADER BLOCK(block) { directive |= "%header" BLOCK(block) {
grammar->header_block = *block; grammar->header_block = *block;
delete block; delete block;
} }
directive |= EXTRA_ARG BLOCK(type) varname(name) { directive |= "%extra_arg" BLOCK(type) varname(name) {
grammar->extra_args.push_back(std::make_pair(*type, *name)); grammar->extra_args.push_back(std::make_pair(*type, *name));
delete type; delete type;