summaryrefslogtreecommitdiffstats
path: root/lex.cpp
diff options
context:
space:
mode:
authorMatthias Schiffer <mschiffer@universe-factory.net>2015-03-25 23:52:08 +0100
committerMatthias Schiffer <mschiffer@universe-factory.net>2015-03-25 23:52:08 +0100
commit09972613c44cf9968afc17d7294aa5e77fc29b9c (patch)
tree3eb8dcd846f58b781a271c1f5c39f1dc27e06c18 /lex.cpp
downloadsolar-09972613c44cf9968afc17d7294aa5e77fc29b9c.tar
solar-09972613c44cf9968afc17d7294aa5e77fc29b9c.zip
Some initial work
Diffstat (limited to 'lex.cpp')
-rw-r--r--lex.cpp372
1 files changed, 372 insertions, 0 deletions
diff --git a/lex.cpp b/lex.cpp
new file mode 100644
index 0000000..59d17f2
--- /dev/null
+++ b/lex.cpp
@@ -0,0 +1,372 @@
+/*
+ Copyright (c) 2013-2014, Matthias Schiffer <mschiffer@universe-factory.net>
+ All rights reserved.
+
+ Redistribution and use in source and binary forms, with or without
+ modification, are permitted provided that the following conditions are met:
+
+ 1. Redistributions of source code must retain the above copyright notice,
+ this list of conditions and the following disclaimer.
+ 2. Redistributions in binary form must reproduce the above copyright notice,
+ this list of conditions and the following disclaimer in the documentation
+ and/or other materials provided with the distribution.
+
+ THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+ DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
+ FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
+ SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
+ CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+ OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+*/
+
+
+#include "lex.hpp"
+
+#include <cstdlib>
+
+
+#define array_size(array) (sizeof(array)/sizeof((array)[0]))
+
+
+namespace solar {
+
+
+struct keyword_t {
+ const char *keyword;
+ int token;
+};
+
+/* the keyword list must be sorted */
+static const keyword_t keywords[] = {
+};
+
+static int compare_keywords(const void *v1, const void *v2) {
+ const keyword_t *k1 = static_cast<const keyword_t*>(v1), *k2 = static_cast<const keyword_t*>(v2);
+ return std::strcmp(k1->keyword, k2->keyword);
+}
+
+
+bool lex_t::advance() {
+ if (start > 0) {
+ std::memmove(buffer, buffer+start, end - start);
+ end -= start;
+ start = 0;
+ }
+
+ if (end == sizeof(buffer))
+ return false;
+
+ size_t l = std::fread(buffer+end, 1, sizeof(buffer) - end, file);
+
+ end += l;
+ return l;
+}
+
+bool lex_t::next(bool move) {
+ if (start + tok_len >= end)
+ return false;
+
+ if (current() == '\n') {
+ loc.last_column = 0;
+ loc.last_line++;
+ }
+ else {
+ loc.last_column++;
+ }
+
+ if (move)
+ start++;
+ else
+ tok_len++;
+
+
+ if (start + tok_len >= end)
+ return advance();
+
+ return true;
+}
+
+void lex_t::consume(bool consume_needspace) {
+ start += tok_len;
+ tok_len = 0;
+
+ needspace = consume_needspace;
+}
+
+int lex_t::io_error(value_t *value) {
+ value->error = "I/O error";
+ return -1;
+}
+
+int lex_t::syntax_error(value_t *value) {
+ if (std::ferror(file))
+ return io_error(value);
+
+ value->error = "syntax error";
+ return -1;
+}
+
+int lex_t::consume_comment(value_t *value) {
+ char prev = 0;
+
+ while (next(true)) {
+ if (prev == '*' && current() == '/') {
+ next(true);
+ consume(false);
+ return 0;
+ }
+
+ prev = current();
+ }
+
+ if (std::ferror(file))
+ return io_error(value);
+
+ value->error = "unterminated block comment";
+ return -1;
+}
+
+int lex_t::unterminated_string(value_t *value) {
+ if (ferror(file))
+ return io_error(value);
+
+ value->error = "unterminated string";
+ return -1;
+}
+
+int lex_t::lex_string(value_t *value) {
+ char *buf = NULL;
+ size_t len = 1024;
+ size_t pos = 0;
+
+ if (needspace)
+ return syntax_error(value);
+
+ buf = static_cast<char*>(std::malloc(len));
+
+ while (true) {
+ if (!next(true)) {
+ std::free(buf);
+ return unterminated_string(value);
+ }
+
+ char cur = current();
+
+ if (cur == '"')
+ break;
+
+ if (cur == '\\') {
+ if (!next(true)) {
+ free(buf);
+ return unterminated_string(value);
+ }
+
+ cur = current();
+
+ if (cur == '\n')
+ continue;
+ }
+
+ if (pos >= len) {
+ len *= 2;
+ buf = static_cast<char*>(std::realloc(buf, len));
+ }
+
+ buf[pos++] = cur;
+ }
+
+ value->str = strndup(buf, pos);
+ std::free(buf);
+
+ next(true);
+ consume(true);
+
+ return TOK_STRING;
+}
+
+int lex_t::lex_number(value_t *value) {
+ if (needspace)
+ return syntax_error(value);
+
+ while (next(false)) {
+ char cur = current();
+
+ if (cur >= '0' && cur <= '9')
+ continue;
+
+ if (cur == 'x' || (cur >= 'a' && cur <= 'f') || (cur >= 'A' && cur <= 'F'))
+ continue;
+
+ break;
+ }
+
+ char *endptr, *token = get_token();
+ value->number = std::strtoull(token, &endptr, 0);
+
+ bool ok = !*endptr;
+ free(token);
+
+ if (!ok)
+ return syntax_error(value);
+
+ consume(true);
+
+ return TOK_UINT;
+}
+
+int lex_t::lex_keyword(value_t *value) {
+ if (needspace)
+ return syntax_error(value);
+
+ while (next(false)) {
+ char cur = current();
+
+ if (!((cur >= 'a' && cur <= 'z') || (cur >= '0' && cur <= '9') || cur == '-'))
+ break;
+ }
+
+ char *token = get_token();
+ const keyword_t key = { .keyword = token };
+ const keyword_t *ret = static_cast<const keyword_t*>(bsearch(&key, keywords, array_size(keywords), sizeof(keyword_t), compare_keywords));
+ free(token);
+
+ if (!ret)
+ return syntax_error(value);
+
+ consume(true);
+
+ return ret->token;
+}
+
+int lex_t::lex_symbol(value_t *value, bool terminal) {
+ if (needspace)
+ return syntax_error(value);
+
+ while (next(false)) {
+ char cur = current();
+
+ switch (cur) {
+ case 'A' ... 'Z':
+ if (!terminal)
+ break;
+
+ continue;
+
+ case 'a' ... 'z':
+ if (terminal)
+ break;
+
+ continue;
+
+ case '0' ... '9':
+ case '_':
+ continue;
+ }
+
+ break;
+ }
+
+ value->str = get_token();
+ return terminal ? TOK_TERM : TOK_NONTERM;
+}
+
+lex_t::lex_t(FILE *file0) : file(file0), needspace(false), start(0), end(0), tok_len(0) {
+ advance();
+}
+
+int lex_t::lex(value_t *value) {
+ int token;
+
+ while (end > start) {
+ loc.first_line = loc.last_line;
+ loc.first_column = loc.last_column+1;
+
+ switch (current()) {
+ case ' ':
+ case '\n':
+ case '\t':
+ case '\r':
+ next(true);
+ consume(false);
+ continue;
+
+ case ';':
+ case ':':
+ case '{':
+ case '}':
+ case '|':
+ case '=':
+ token = current();
+ next(true);
+ consume(false);
+ return token;
+
+ case '/':
+ if (!next(true))
+ return syntax_error(value);
+
+ if (current() == '*') {
+ token = consume_comment(value);
+ if (token)
+ return token;
+
+ continue;
+ }
+
+ if (current() != '/')
+ return syntax_error(value);
+
+ /* fall-through */
+ case '#':
+ while (next(true)) {
+ if (current() == '\n')
+ break;
+ }
+
+ next(true);
+ consume(false);
+ continue;
+
+ case '\'':
+ if (!next(true))
+ return syntax_error(value);
+
+ value->number = current();
+
+ if (!next(true) || current() != '\'')
+ return syntax_error(value);
+
+ next(true);
+
+ consume(false);
+
+ return TOK_CHAR;
+
+ case '"':
+ return lex_string(value);
+
+ case '0' ... '9':
+ return lex_number(value);
+
+ case 'a' ... 'z':
+ return lex_symbol(value, false);
+
+ case 'A' ... 'Z':
+ return lex_symbol(value, true);
+
+ default:
+ return syntax_error(value);
+ }
+ }
+
+ if (ferror(file))
+ return io_error(value);
+
+ return 0;
+}
+
+}