summaryrefslogtreecommitdiffstats
path: root/lex.cpp
diff options
context:
space:
mode:
Diffstat (limited to 'lex.cpp')
-rw-r--r--lex.cpp372
1 files changed, 0 insertions, 372 deletions
diff --git a/lex.cpp b/lex.cpp
deleted file mode 100644
index 59d17f2..0000000
--- a/lex.cpp
+++ /dev/null
@@ -1,372 +0,0 @@
-/*
- Copyright (c) 2013-2014, Matthias Schiffer <mschiffer@universe-factory.net>
- All rights reserved.
-
- Redistribution and use in source and binary forms, with or without
- modification, are permitted provided that the following conditions are met:
-
- 1. Redistributions of source code must retain the above copyright notice,
- this list of conditions and the following disclaimer.
- 2. Redistributions in binary form must reproduce the above copyright notice,
- this list of conditions and the following disclaimer in the documentation
- and/or other materials provided with the distribution.
-
- THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
- AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
- DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
- FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
- DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
- SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
- CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
- OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
- OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-*/
-
-
-#include "lex.hpp"
-
-#include <cstdlib>
-
-
-#define array_size(array) (sizeof(array)/sizeof((array)[0]))
-
-
-namespace solar {
-
-
-struct keyword_t {
- const char *keyword;
- int token;
-};
-
-/* the keyword list must be sorted */
-static const keyword_t keywords[] = {
-};
-
-static int compare_keywords(const void *v1, const void *v2) {
- const keyword_t *k1 = static_cast<const keyword_t*>(v1), *k2 = static_cast<const keyword_t*>(v2);
- return std::strcmp(k1->keyword, k2->keyword);
-}
-
-
-bool lex_t::advance() {
- if (start > 0) {
- std::memmove(buffer, buffer+start, end - start);
- end -= start;
- start = 0;
- }
-
- if (end == sizeof(buffer))
- return false;
-
- size_t l = std::fread(buffer+end, 1, sizeof(buffer) - end, file);
-
- end += l;
- return l;
-}
-
-bool lex_t::next(bool move) {
- if (start + tok_len >= end)
- return false;
-
- if (current() == '\n') {
- loc.last_column = 0;
- loc.last_line++;
- }
- else {
- loc.last_column++;
- }
-
- if (move)
- start++;
- else
- tok_len++;
-
-
- if (start + tok_len >= end)
- return advance();
-
- return true;
-}
-
-void lex_t::consume(bool consume_needspace) {
- start += tok_len;
- tok_len = 0;
-
- needspace = consume_needspace;
-}
-
-int lex_t::io_error(value_t *value) {
- value->error = "I/O error";
- return -1;
-}
-
-int lex_t::syntax_error(value_t *value) {
- if (std::ferror(file))
- return io_error(value);
-
- value->error = "syntax error";
- return -1;
-}
-
-int lex_t::consume_comment(value_t *value) {
- char prev = 0;
-
- while (next(true)) {
- if (prev == '*' && current() == '/') {
- next(true);
- consume(false);
- return 0;
- }
-
- prev = current();
- }
-
- if (std::ferror(file))
- return io_error(value);
-
- value->error = "unterminated block comment";
- return -1;
-}
-
-int lex_t::unterminated_string(value_t *value) {
- if (ferror(file))
- return io_error(value);
-
- value->error = "unterminated string";
- return -1;
-}
-
-int lex_t::lex_string(value_t *value) {
- char *buf = NULL;
- size_t len = 1024;
- size_t pos = 0;
-
- if (needspace)
- return syntax_error(value);
-
- buf = static_cast<char*>(std::malloc(len));
-
- while (true) {
- if (!next(true)) {
- std::free(buf);
- return unterminated_string(value);
- }
-
- char cur = current();
-
- if (cur == '"')
- break;
-
- if (cur == '\\') {
- if (!next(true)) {
- free(buf);
- return unterminated_string(value);
- }
-
- cur = current();
-
- if (cur == '\n')
- continue;
- }
-
- if (pos >= len) {
- len *= 2;
- buf = static_cast<char*>(std::realloc(buf, len));
- }
-
- buf[pos++] = cur;
- }
-
- value->str = strndup(buf, pos);
- std::free(buf);
-
- next(true);
- consume(true);
-
- return TOK_STRING;
-}
-
-int lex_t::lex_number(value_t *value) {
- if (needspace)
- return syntax_error(value);
-
- while (next(false)) {
- char cur = current();
-
- if (cur >= '0' && cur <= '9')
- continue;
-
- if (cur == 'x' || (cur >= 'a' && cur <= 'f') || (cur >= 'A' && cur <= 'F'))
- continue;
-
- break;
- }
-
- char *endptr, *token = get_token();
- value->number = std::strtoull(token, &endptr, 0);
-
- bool ok = !*endptr;
- free(token);
-
- if (!ok)
- return syntax_error(value);
-
- consume(true);
-
- return TOK_UINT;
-}
-
-int lex_t::lex_keyword(value_t *value) {
- if (needspace)
- return syntax_error(value);
-
- while (next(false)) {
- char cur = current();
-
- if (!((cur >= 'a' && cur <= 'z') || (cur >= '0' && cur <= '9') || cur == '-'))
- break;
- }
-
- char *token = get_token();
- const keyword_t key = { .keyword = token };
- const keyword_t *ret = static_cast<const keyword_t*>(bsearch(&key, keywords, array_size(keywords), sizeof(keyword_t), compare_keywords));
- free(token);
-
- if (!ret)
- return syntax_error(value);
-
- consume(true);
-
- return ret->token;
-}
-
-int lex_t::lex_symbol(value_t *value, bool terminal) {
- if (needspace)
- return syntax_error(value);
-
- while (next(false)) {
- char cur = current();
-
- switch (cur) {
- case 'A' ... 'Z':
- if (!terminal)
- break;
-
- continue;
-
- case 'a' ... 'z':
- if (terminal)
- break;
-
- continue;
-
- case '0' ... '9':
- case '_':
- continue;
- }
-
- break;
- }
-
- value->str = get_token();
- return terminal ? TOK_TERM : TOK_NONTERM;
-}
-
-lex_t::lex_t(FILE *file0) : file(file0), needspace(false), start(0), end(0), tok_len(0) {
- advance();
-}
-
-int lex_t::lex(value_t *value) {
- int token;
-
- while (end > start) {
- loc.first_line = loc.last_line;
- loc.first_column = loc.last_column+1;
-
- switch (current()) {
- case ' ':
- case '\n':
- case '\t':
- case '\r':
- next(true);
- consume(false);
- continue;
-
- case ';':
- case ':':
- case '{':
- case '}':
- case '|':
- case '=':
- token = current();
- next(true);
- consume(false);
- return token;
-
- case '/':
- if (!next(true))
- return syntax_error(value);
-
- if (current() == '*') {
- token = consume_comment(value);
- if (token)
- return token;
-
- continue;
- }
-
- if (current() != '/')
- return syntax_error(value);
-
- /* fall-through */
- case '#':
- while (next(true)) {
- if (current() == '\n')
- break;
- }
-
- next(true);
- consume(false);
- continue;
-
- case '\'':
- if (!next(true))
- return syntax_error(value);
-
- value->number = current();
-
- if (!next(true) || current() != '\'')
- return syntax_error(value);
-
- next(true);
-
- consume(false);
-
- return TOK_CHAR;
-
- case '"':
- return lex_string(value);
-
- case '0' ... '9':
- return lex_number(value);
-
- case 'a' ... 'z':
- return lex_symbol(value, false);
-
- case 'A' ... 'Z':
- return lex_symbol(value, true);
-
- default:
- return syntax_error(value);
- }
- }
-
- if (ferror(file))
- return io_error(value);
-
- return 0;
-}
-
-}