Don't parse keywords in the lexer

2015-04-10 18:01:10 +02:00 · 2015-04-10 18:01:10 +02:00 · 634e5db209
commit 634e5db209
parent 650fff74de
5 changed files with 1076 additions and 1210 deletions
--- a/src/lex.cpp
+++ b/src/lex.cpp
@ -34,26 +34,6 @@
 namespace solar {
 struct keyword_t {
 	const char *keyword;
 	int token;
 };
 /* the keyword list must be sorted */
 static const keyword_t keywords[] = {
 	{"%extra_arg", TOK_EXTRA_ARG},
 	{"%header", TOK_HEADER},
 	{"%source", TOK_SOURCE},
 	{"%type", TOK_TYPE},
 };
 static int compare_keywords(const void *v1, const void *v2) {
 	const keyword_t *k1 = static_cast<const keyword_t*>(v1), *k2 = static_cast<const keyword_t*>(v2);
 	return std::strcmp(k1->keyword, k2->keyword);
 }
 bool lex_t::advance() {
 	if (start > 0) {
 		std::memmove(buffer, buffer+start, end - start);
@ -183,61 +163,6 @@ int lex_t::lex_string(parse_token_value_t *value) {
 	return TOK_STRING;
 }
 /*
 int lex_t::lex_number(parse_token_value_t *value) {
 	if (needspace)
 		return syntax_error(value);
 	while (next(false)) {
 		char cur = current();
 		if (cur >= '0' && cur <= '9')
 			continue;
 		if (cur == 'x' || (cur >= 'a' && cur <= 'f') || (cur >= 'A' && cur <= 'F'))
 			continue;
 		break;
 	}
 	char *endptr, *token = get_token();
 	value->number = std::strtoull(token, &endptr, 0);
 	bool ok = !*endptr;
 	free(token);
 	if (!ok)
 		return syntax_error(value);
 	consume(true);
 	return TOK_UINT;
 }*/
 int lex_t::lex_keyword(parse_token_value_t *value) {
 	if (needspace)
 		return syntax_error(value);
 	while (next(false)) {
 		char cur = current();
 		if (!((cur >= 'a' && cur <= 'z') || (cur >= '0' && cur <= '9') || cur == '_'))
 			break;
 	}
 	std::string *token = get_token();
 	const keyword_t key = { .keyword = token->c_str(), .token = 0 };
 	const keyword_t *ret = static_cast<const keyword_t*>(bsearch(&key, keywords, array_size(keywords), sizeof(keyword_t), compare_keywords));
 	delete token;
 	if (!ret)
 		return syntax_error(value);
 	consume(true);
 	return ret->token;
 }
 int lex_t::unterminated_block(parse_token_value_t *value) {
 	if (ferror(file))
 		return io_error(value);
@ -366,8 +291,18 @@ int lex_t::lex(parse_token_value_t *value) {
 		case '\r':
 			next(true);
 			consume(false);
 			dumb_mode = false;
 			continue;
 		}
 		if (dumb_mode) {
 			token = current();
 			next(true);
 			consume(false);
 			return token;
 		}
 		switch (current()) {
 		case ';':
 		case ':':
 		case '|':
@ -432,7 +367,12 @@ int lex_t::lex(parse_token_value_t *value) {
 			return lex_symbol(value);
 		case '%':
-			return lex_keyword(value);
+			dumb_mode = true;
 			token = current();
 			next(true);
 			consume(false);
 			return token;
 		default:
 			return syntax_error(value);
--- a/src/lex.hpp
+++ b/src/lex.hpp
@ -51,6 +51,7 @@ private:
 	std::FILE *file;
 	bool needspace;
 	bool dumb_mode;
 	size_t start;
 	size_t end;
@ -69,8 +70,6 @@ private:
 	int unterminated_string(parse_token_value_t *value);
 	int lex_string(parse_token_value_t *value);
 	//int lex_number(parse_token_value_t *value);
 	int lex_keyword(parse_token_value_t *value);
 	int lex_block(parse_token_value_t *value);
 	int lex_symbol(parse_token_value_t *value);
--- a/src/parse.cpp
+++ b/src/parse.cpp
--- a/src/parse.hpp
+++ b/src/parse.hpp
@ -6,14 +6,10 @@
 enum parse_token_t {
 	TOK_BLOCK = 256,
 	TOK_CHAR = 257,
-	TOK_EXTRA_ARG = 258,
+	TOK_STRING = 258,
-	TOK_HEADER = 259,
+	TOK_SYMBOL = 259,
-	TOK_SOURCE = 260,
+	TOK_SYMBOL_LC = 260,
-	TOK_STRING = 261,
+	TOK_SYMBOL_UC = 261,
 	TOK_SYMBOL = 262,
 	TOK_SYMBOL_LC = 263,
 	TOK_SYMBOL_UC = 264,
 	TOK_TYPE = 265,
 };
 typedef struct parse_token_value {
--- a/src/parse.y
+++ b/src/parse.y
@ -34,14 +34,14 @@ directive |= rule(rule) {
 	delete rule;
 }
-directive |= TYPE SYMBOL_LC(nonterm) BLOCK(type) {
+directive |= "%type" SYMBOL_LC(nonterm) BLOCK(type) {
 	grammar->nonterm_types.insert(std::make_pair(*nonterm, *type));
 	delete nonterm;
 	delete type;
 }
-directive |= TYPE term(term) BLOCK(type) varname(name) {
+directive |= "%type" term(term) BLOCK(type) varname(name) {
 	grammar->term_types.insert(std::make_pair(*term, std::make_pair(*type, *name)));
 	delete term;
@ -49,17 +49,17 @@ directive |= TYPE term(term) BLOCK(type) varname(name) {
 	delete name;
 }
-directive |= SOURCE BLOCK(block) {
+directive |= "%source" BLOCK(block) {
 	grammar->source_block = *block;
 	delete block;
 }
-directive |= HEADER BLOCK(block) {
+directive |= "%header" BLOCK(block) {
 	grammar->header_block = *block;
 	delete block;
 }
-directive |= EXTRA_ARG BLOCK(type) varname(name) {
+directive |= "%extra_arg" BLOCK(type) varname(name) {
 	grammar->extra_args.push_back(std::make_pair(*type, *name));
 	delete type;