From 8de90acc6791a62d4b2e48e9bd05daa0d5cfe4b6 Mon Sep 17 00:00:00 2001
From: Matthias Schiffer <mschiffer@universe-factory.net>
Date: Fri, 27 Mar 2015 03:38:01 +0100
Subject: Generate items from grammar

---
 src/lex.cpp | 369 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 369 insertions(+)
 create mode 100644 src/lex.cpp

(limited to 'src/lex.cpp')
diff --git a/src/lex.cpp b/src/lex.cpp
new file mode 100644
index 0000000..0587689
--- /dev/null
+++ b/src/lex.cpp
@@ -0,0 +1,369 @@
+/*
+  Copyright (c) 2013-2014, Matthias Schiffer <mschiffer@universe-factory.net>
+  All rights reserved.
+
+  Redistribution and use in source and binary forms, with or without
+  modification, are permitted provided that the following conditions are met:
+
+    1. Redistributions of source code must retain the above copyright notice,
+       this list of conditions and the following disclaimer.
+    2. Redistributions in binary form must reproduce the above copyright notice,
+       this list of conditions and the following disclaimer in the documentation
+       and/or other materials provided with the distribution.
+
+  THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+  AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+  IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+  DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
+  FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+  DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
+  SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
+  CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+  OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+  OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+*/
+
+
+#include "lex.hpp"
+
+#include <cstdlib>
+
+
+#define array_size(array) (sizeof(array)/sizeof((array)[0]))
+
+
+namespace solar {
+
+
+struct keyword_t {
+	const char *keyword;
+	int token;
+};
+
+/* the keyword list must be sorted */
+static const keyword_t keywords[] = {
+};
+
+static int compare_keywords(const void *v1, const void *v2) {
+	const keyword_t *k1 = static_cast<const keyword_t*>(v1), *k2 = static_cast<const keyword_t*>(v2);
+	return std::strcmp(k1->keyword, k2->keyword);
+}
+
+
+bool lex_t::advance() {
+	if (start > 0) {
+		std::memmove(buffer, buffer+start, end - start);
+		end -= start;
+		start = 0;
+	}
+
+	if (end == sizeof(buffer))
+		return false;
+
+	size_t l = std::fread(buffer+end, 1, sizeof(buffer) - end, file);
+
+	end += l;
+	return l;
+}
+
+bool lex_t::next(bool move) {
+	if (start + tok_len >= end)
+		return false;
+
+	if (current() == '\n') {
+		loc.last_column = 0;
+		loc.last_line++;
+	}
+	else {
+		loc.last_column++;
+	}
+
+	if (move)
+		start++;
+	else
+		tok_len++;
+
+
+	if (start + tok_len >= end)
+		return advance();
+
+	return true;
+}
+
+void lex_t::consume(bool consume_needspace) {
+	start += tok_len;
+	tok_len = 0;
+
+	needspace = consume_needspace;
+}
+
+int lex_t::io_error(value_t *value) {
+	value->error = "I/O error";
+	return -1;
+}
+
+int lex_t::syntax_error(value_t *value) {
+	if (std::ferror(file))
+		return io_error(value);
+
+	value->error = "syntax error";
+	return -1;
+}
+
+int lex_t::consume_comment(value_t *value) {
+	char prev = 0;
+
+	while (next(true)) {
+		if (prev == '*' && current() == '/') {
+			next(true);
+			consume(false);
+			return 0;
+		}
+
+		prev = current();
+	}
+
+	if (std::ferror(file))
+		return io_error(value);
+
+	value->error = "unterminated block comment";
+	return -1;
+}
+
+/*
+int lex_t::unterminated_string(value_t *value) {
+	if (ferror(file))
+		return io_error(value);
+
+	value->error = "unterminated string";
+	return -1;
+}
+
+int lex_t::lex_string(value_t *value) {
+	char *buf = NULL;
+	size_t len = 1024;
+	size_t pos = 0;
+
+	if (needspace)
+		return syntax_error(value);
+
+	buf = static_cast<char*>(std::malloc(len));
+
+	while (true) {
+		if (!next(true)) {
+			std::free(buf);
+			return unterminated_string(value);
+		}
+
+		char cur = current();
+
+		if (cur == '"')
+			break;
+
+		if (cur == '\\') {
+			if (!next(true)) {
+				free(buf);
+				return unterminated_string(value);
+			}
+
+			cur = current();
+
+			if (cur == '\n')
+				continue;
+		}
+
+		if (pos >= len) {
+			len *= 2;
+			buf = static_cast<char*>(std::realloc(buf, len));
+		}
+
+		buf[pos++] = cur;
+	}
+
+	value->str = strndup(buf, pos);
+	std::free(buf);
+
+	next(true);
+	consume(true);
+
+	return TOK_STRING;
+	}*/
+
+int lex_t::lex_number(value_t *value) {
+	if (needspace)
+		return syntax_error(value);
+
+	while (next(false)) {
+		char cur = current();
+
+		if (cur >= '0' && cur <= '9')
+			continue;
+
+		if (cur == 'x' || (cur >= 'a' && cur <= 'f') || (cur >= 'A' && cur <= 'F'))
+			continue;
+
+		break;
+	}
+
+	char *endptr, *token = get_token();
+	value->number = std::strtoull(token, &endptr, 0);
+
+	bool ok = !*endptr;
+	free(token);
+
+	if (!ok)
+		return syntax_error(value);
+
+	consume(true);
+
+	return TOK_UINT;
+}
+
+int lex_t::lex_keyword(value_t *value) {
+	if (needspace)
+		return syntax_error(value);
+
+	while (next(false)) {
+		char cur = current();
+
+		if (!((cur >= 'a' && cur <= 'z') || (cur >= '0' && cur <= '9') || cur == '-'))
+			break;
+	}
+
+	char *token = get_token();
+	const keyword_t key = { .keyword = token, .token = 0 };
+	const keyword_t *ret = static_cast<const keyword_t*>(bsearch(&key, keywords, array_size(keywords), sizeof(keyword_t), compare_keywords));
+	free(token);
+
+	if (!ret)
+		return syntax_error(value);
+
+	consume(true);
+
+	return ret->token;
+}
+
+int lex_t::lex_symbol(value_t *value, bool terminal) {
+	if (needspace)
+		return syntax_error(value);
+
+	while (next(false)) {
+		char cur = current();
+
+		switch (cur) {
+		case 'A' ... 'Z':
+			if (!terminal)
+				break;
+
+			continue;
+
+		case 'a' ... 'z':
+			if (terminal)
+				break;
+
+			continue;
+
+		case '0' ... '9':
+		case '_':
+			continue;
+		}
+
+		break;
+	}
+
+	value->str = get_token();
+	return terminal ? TOK_TERM : TOK_NONTERM;
+}
+
+int lex_t::lex(value_t *value) {
+	int token;
+
+	while (end > start) {
+		loc.first_line = loc.last_line;
+		loc.first_column = loc.last_column+1;
+
+		switch (current()) {
+		case ' ':
+		case '\n':
+		case '\t':
+		case '\r':
+			next(true);
+			consume(false);
+			continue;
+
+		case ';':
+		case ':':
+		case '{':
+		case '}':
+		case '|':
+		case '=':
+			token = current();
+			next(true);
+			consume(false);
+			return token;
+
+		case '/':
+			if (!next(true))
+				return syntax_error(value);
+
+			if (current() == '*') {
+				token = consume_comment(value);
+				if (token)
+					return token;
+
+				continue;
+			}
+
+			if (current() != '/')
+				return syntax_error(value);
+
+			///* fall-through */
+			//case '#':
+			while (next(true)) {
+				if (current() == '\n')
+					break;
+			}
+
+			next(true);
+			consume(false);
+			continue;
+
+		case '\'':
+			if (!next(true))
+				return syntax_error(value);
+
+			value->number = current();
+
+			if (!next(true) || current() != '\'')
+				return syntax_error(value);
+
+			next(true);
+
+			consume(false);
+
+			return TOK_CHAR;
+
+			//case '"':
+			//return lex_string(value);
+
+		case '0' ... '9':
+			return lex_number(value);
+
+		case 'a' ... 'z':
+			return lex_symbol(value, false);
+
+		case 'A' ... 'Z':
+			return lex_symbol(value, true);
+
+		default:
+			return syntax_error(value);
+		}
+	}
+
+	if (ferror(file))
+		return io_error(value);
+
+	return 0;
+}
+
+}
-- 
cgit v1.2.3