diff options
Diffstat (limited to 'lex.cpp')
-rw-r--r-- | lex.cpp | 372 |
1 files changed, 0 insertions, 372 deletions
diff --git a/lex.cpp b/lex.cpp deleted file mode 100644 index 59d17f2..0000000 --- a/lex.cpp +++ /dev/null @@ -1,372 +0,0 @@ -/* - Copyright (c) 2013-2014, Matthias Schiffer <mschiffer@universe-factory.net> - All rights reserved. - - Redistribution and use in source and binary forms, with or without - modification, are permitted provided that the following conditions are met: - - 1. Redistributions of source code must retain the above copyright notice, - this list of conditions and the following disclaimer. - 2. Redistributions in binary form must reproduce the above copyright notice, - this list of conditions and the following disclaimer in the documentation - and/or other materials provided with the distribution. - - THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" - AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE - DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE - FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL - DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR - SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER - CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, - OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE - OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -*/ - - -#include "lex.hpp" - -#include <cstdlib> - - -#define array_size(array) (sizeof(array)/sizeof((array)[0])) - - -namespace solar { - - -struct keyword_t { - const char *keyword; - int token; -}; - -/* the keyword list must be sorted */ -static const keyword_t keywords[] = { -}; - -static int compare_keywords(const void *v1, const void *v2) { - const keyword_t *k1 = static_cast<const keyword_t*>(v1), *k2 = static_cast<const keyword_t*>(v2); - return std::strcmp(k1->keyword, k2->keyword); -} - - -bool lex_t::advance() { - if (start > 0) { - std::memmove(buffer, buffer+start, end - start); - end -= start; - start = 0; - } - - if (end == sizeof(buffer)) - return false; - - size_t l = std::fread(buffer+end, 1, sizeof(buffer) - end, file); - - end += l; - return l; -} - -bool lex_t::next(bool move) { - if (start + tok_len >= end) - return false; - - if (current() == '\n') { - loc.last_column = 0; - loc.last_line++; - } - else { - loc.last_column++; - } - - if (move) - start++; - else - tok_len++; - - - if (start + tok_len >= end) - return advance(); - - return true; -} - -void lex_t::consume(bool consume_needspace) { - start += tok_len; - tok_len = 0; - - needspace = consume_needspace; -} - -int lex_t::io_error(value_t *value) { - value->error = "I/O error"; - return -1; -} - -int lex_t::syntax_error(value_t *value) { - if (std::ferror(file)) - return io_error(value); - - value->error = "syntax error"; - return -1; -} - -int lex_t::consume_comment(value_t *value) { - char prev = 0; - - while (next(true)) { - if (prev == '*' && current() == '/') { - next(true); - consume(false); - return 0; - } - - prev = current(); - } - - if (std::ferror(file)) - return io_error(value); - - value->error = "unterminated block comment"; - return -1; -} - -int lex_t::unterminated_string(value_t *value) { - if (ferror(file)) - return io_error(value); - - value->error = "unterminated string"; - return -1; -} - -int lex_t::lex_string(value_t *value) { - char *buf = NULL; - size_t len = 1024; - size_t pos = 0; - - if (needspace) - return syntax_error(value); - - buf = static_cast<char*>(std::malloc(len)); - - while (true) { - if (!next(true)) { - std::free(buf); - return unterminated_string(value); - } - - char cur = current(); - - if (cur == '"') - break; - - if (cur == '\\') { - if (!next(true)) { - free(buf); - return unterminated_string(value); - } - - cur = current(); - - if (cur == '\n') - continue; - } - - if (pos >= len) { - len *= 2; - buf = static_cast<char*>(std::realloc(buf, len)); - } - - buf[pos++] = cur; - } - - value->str = strndup(buf, pos); - std::free(buf); - - next(true); - consume(true); - - return TOK_STRING; -} - -int lex_t::lex_number(value_t *value) { - if (needspace) - return syntax_error(value); - - while (next(false)) { - char cur = current(); - - if (cur >= '0' && cur <= '9') - continue; - - if (cur == 'x' || (cur >= 'a' && cur <= 'f') || (cur >= 'A' && cur <= 'F')) - continue; - - break; - } - - char *endptr, *token = get_token(); - value->number = std::strtoull(token, &endptr, 0); - - bool ok = !*endptr; - free(token); - - if (!ok) - return syntax_error(value); - - consume(true); - - return TOK_UINT; -} - -int lex_t::lex_keyword(value_t *value) { - if (needspace) - return syntax_error(value); - - while (next(false)) { - char cur = current(); - - if (!((cur >= 'a' && cur <= 'z') || (cur >= '0' && cur <= '9') || cur == '-')) - break; - } - - char *token = get_token(); - const keyword_t key = { .keyword = token }; - const keyword_t *ret = static_cast<const keyword_t*>(bsearch(&key, keywords, array_size(keywords), sizeof(keyword_t), compare_keywords)); - free(token); - - if (!ret) - return syntax_error(value); - - consume(true); - - return ret->token; -} - -int lex_t::lex_symbol(value_t *value, bool terminal) { - if (needspace) - return syntax_error(value); - - while (next(false)) { - char cur = current(); - - switch (cur) { - case 'A' ... 'Z': - if (!terminal) - break; - - continue; - - case 'a' ... 'z': - if (terminal) - break; - - continue; - - case '0' ... '9': - case '_': - continue; - } - - break; - } - - value->str = get_token(); - return terminal ? TOK_TERM : TOK_NONTERM; -} - -lex_t::lex_t(FILE *file0) : file(file0), needspace(false), start(0), end(0), tok_len(0) { - advance(); -} - -int lex_t::lex(value_t *value) { - int token; - - while (end > start) { - loc.first_line = loc.last_line; - loc.first_column = loc.last_column+1; - - switch (current()) { - case ' ': - case '\n': - case '\t': - case '\r': - next(true); - consume(false); - continue; - - case ';': - case ':': - case '{': - case '}': - case '|': - case '=': - token = current(); - next(true); - consume(false); - return token; - - case '/': - if (!next(true)) - return syntax_error(value); - - if (current() == '*') { - token = consume_comment(value); - if (token) - return token; - - continue; - } - - if (current() != '/') - return syntax_error(value); - - /* fall-through */ - case '#': - while (next(true)) { - if (current() == '\n') - break; - } - - next(true); - consume(false); - continue; - - case '\'': - if (!next(true)) - return syntax_error(value); - - value->number = current(); - - if (!next(true) || current() != '\'') - return syntax_error(value); - - next(true); - - consume(false); - - return TOK_CHAR; - - case '"': - return lex_string(value); - - case '0' ... '9': - return lex_number(value); - - case 'a' ... 'z': - return lex_symbol(value, false); - - case 'A' ... 'Z': - return lex_symbol(value, true); - - default: - return syntax_error(value); - } - } - - if (ferror(file)) - return io_error(value); - - return 0; -} - -} |