444 lines
7.4 KiB
C++
444 lines
7.4 KiB
C++
/*
|
|
Copyright (c) 2013-2015, Matthias Schiffer <mschiffer@universe-factory.net>
|
|
All rights reserved.
|
|
|
|
Redistribution and use in source and binary forms, with or without
|
|
modification, are permitted provided that the following conditions are met:
|
|
|
|
1. Redistributions of source code must retain the above copyright notice,
|
|
this list of conditions and the following disclaimer.
|
|
2. Redistributions in binary form must reproduce the above copyright notice,
|
|
this list of conditions and the following disclaimer in the documentation
|
|
and/or other materials provided with the distribution.
|
|
|
|
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
|
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
|
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
|
|
DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
|
|
FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
|
|
DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
|
|
SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
|
|
CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
|
|
OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
|
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
|
*/
|
|
|
|
|
|
#include "lex.hpp"
|
|
|
|
|
|
namespace solar {
|
|
|
|
bool lex_t::advance() {
|
|
if (start > 0) {
|
|
std::memmove(buffer, buffer+start, end - start);
|
|
end -= start;
|
|
start = 0;
|
|
}
|
|
|
|
if (end == sizeof(buffer))
|
|
return false;
|
|
|
|
size_t l = std::fread(buffer+end, 1, sizeof(buffer) - end, file);
|
|
|
|
end += l;
|
|
return l;
|
|
}
|
|
|
|
bool lex_t::next(bool move) {
|
|
if (start + tok_len >= end)
|
|
return false;
|
|
|
|
if (current() == '\n') {
|
|
loc.last_column = 0;
|
|
loc.last_line++;
|
|
}
|
|
else {
|
|
loc.last_column++;
|
|
}
|
|
|
|
if (move)
|
|
start++;
|
|
else
|
|
tok_len++;
|
|
|
|
|
|
if (start + tok_len >= end)
|
|
return advance();
|
|
|
|
return true;
|
|
}
|
|
|
|
void lex_t::consume(bool consume_needspace) {
|
|
start += tok_len;
|
|
tok_len = 0;
|
|
|
|
needspace = consume_needspace;
|
|
}
|
|
|
|
int lex_t::io_error(parse_token_value_t *value) {
|
|
//value->error = "I/O error";
|
|
(void)value;
|
|
return -1;
|
|
}
|
|
|
|
int lex_t::syntax_error(parse_token_value_t *value) {
|
|
if (std::ferror(file))
|
|
return io_error(value);
|
|
|
|
//value->error = "syntax error";
|
|
return -1;
|
|
}
|
|
|
|
int lex_t::consume_comment(parse_token_value_t *value) {
|
|
char prev = 0;
|
|
|
|
while (next(true)) {
|
|
if (prev == '*' && current() == '/') {
|
|
next(true);
|
|
consume(false);
|
|
return 0;
|
|
}
|
|
|
|
prev = current();
|
|
}
|
|
|
|
if (std::ferror(file))
|
|
return io_error(value);
|
|
|
|
//value->error = "unterminated block comment";
|
|
return -1;
|
|
}
|
|
|
|
int lex_t::unterminated_string(parse_token_value_t *value) {
|
|
if (ferror(file))
|
|
return io_error(value);
|
|
|
|
//value->error = "unterminated string";
|
|
return -1;
|
|
}
|
|
|
|
int lex_t::lex_string(parse_token_value_t *value) {
|
|
if (needspace)
|
|
return syntax_error(value);
|
|
|
|
std::string *buf = new std::string;
|
|
|
|
while (true) {
|
|
if (!next(true)) {
|
|
delete buf;
|
|
return unterminated_string(value);
|
|
}
|
|
|
|
char cur = current();
|
|
|
|
if (cur == '"')
|
|
break;
|
|
|
|
if (cur == '\\') {
|
|
if (!next(true)) {
|
|
free(buf);
|
|
return unterminated_string(value);
|
|
}
|
|
|
|
cur = current();
|
|
|
|
if (cur == '\n')
|
|
continue;
|
|
}
|
|
|
|
*buf += cur;
|
|
}
|
|
|
|
value->str = buf;
|
|
|
|
next(true);
|
|
consume(true);
|
|
|
|
return TOK_STRING;
|
|
}
|
|
|
|
int lex_t::unterminated_block(parse_token_value_t *value) {
|
|
if (ferror(file))
|
|
return io_error(value);
|
|
|
|
//value->error = "unterminated code block";
|
|
return -1;
|
|
}
|
|
|
|
int lex_t::lex_block(parse_token_value_t *value, int token, char open, char close) {
|
|
size_t parens = 0;
|
|
bool line_comment = false;
|
|
bool block_comment = false;
|
|
bool str = false;
|
|
bool pp = false;
|
|
|
|
std::string *buf = new std::string;
|
|
|
|
char prev = 0;
|
|
|
|
while (true) {
|
|
if (!next(true)) {
|
|
delete buf;
|
|
return unterminated_block(value);
|
|
}
|
|
|
|
char cur = current();
|
|
|
|
if (line_comment) {
|
|
if (cur == '\n')
|
|
line_comment = false;
|
|
}
|
|
else if (block_comment) {
|
|
if (prev == '*' && cur == '/')
|
|
block_comment = false;
|
|
}
|
|
else if (str) {
|
|
if (prev != '\\' && cur == '"')
|
|
str = false;
|
|
}
|
|
else if (pp) {
|
|
if (prev != '\\' && cur == '\n')
|
|
pp = false;
|
|
}
|
|
else {
|
|
if (cur == open) {
|
|
parens++;
|
|
}
|
|
else if (cur == close) {
|
|
if (!parens)
|
|
break;
|
|
|
|
parens--;
|
|
}
|
|
else if (cur == '"') {
|
|
str = true;
|
|
}
|
|
else if (cur == '#') {
|
|
pp = true;
|
|
}
|
|
else if (prev == '/' && cur == '/') {
|
|
line_comment = true;
|
|
}
|
|
else if (prev == '/' && cur == '*') {
|
|
block_comment = true;
|
|
}
|
|
}
|
|
|
|
*buf += cur;
|
|
prev = cur;
|
|
}
|
|
|
|
value->str = buf;
|
|
|
|
next(true);
|
|
consume(true);
|
|
|
|
return token;
|
|
}
|
|
|
|
int lex_t::lex_symbol(parse_token_value_t *value) {
|
|
if (needspace)
|
|
return syntax_error(value);
|
|
|
|
bool uc = false;
|
|
bool lc = false;
|
|
|
|
do {
|
|
switch (current()) {
|
|
case 'A' ... 'Z':
|
|
uc = true;
|
|
continue;
|
|
|
|
case 'a' ... 'z':
|
|
lc = true;
|
|
continue;
|
|
|
|
case '0' ... '9':
|
|
case '_':
|
|
continue;
|
|
}
|
|
|
|
break;
|
|
} while (next(false));
|
|
|
|
value->str = get_token();
|
|
|
|
if (uc && !lc)
|
|
return TOK_SYMBOL_UC;
|
|
else
|
|
return TOK_SYMBOL;
|
|
}
|
|
|
|
int lex_t::lex(parse_token_value_t *value) {
|
|
int token;
|
|
|
|
while (end > start) {
|
|
loc.first_line = loc.last_line;
|
|
loc.first_column = loc.last_column+1;
|
|
|
|
if (dumb_mode) {
|
|
switch (current()) {
|
|
case 'a' ... 'z':
|
|
case 'A' ... 'Z':
|
|
case '0' ... '9':
|
|
case '_':
|
|
token = current();
|
|
next(true);
|
|
consume(false);
|
|
return token;
|
|
}
|
|
}
|
|
|
|
dumb_mode = false;
|
|
|
|
switch (current()) {
|
|
case ' ':
|
|
case '\n':
|
|
case '\t':
|
|
case '\r':
|
|
next(true);
|
|
consume(false);
|
|
continue;
|
|
}
|
|
|
|
switch (current()) {
|
|
case ';':
|
|
case ':':
|
|
case '|':
|
|
case '=':
|
|
case '(':
|
|
case ')':
|
|
case '.':
|
|
token = current();
|
|
next(true);
|
|
consume(false);
|
|
return token;
|
|
|
|
case '/':
|
|
if (!next(true))
|
|
return syntax_error(value);
|
|
|
|
if (current() == '*') {
|
|
token = consume_comment(value);
|
|
if (token)
|
|
return token;
|
|
|
|
continue;
|
|
}
|
|
|
|
if (current() != '/')
|
|
return syntax_error(value);
|
|
|
|
while (next(true)) {
|
|
if (current() == '\n')
|
|
break;
|
|
}
|
|
|
|
next(true);
|
|
consume(false);
|
|
continue;
|
|
|
|
case '\'':
|
|
if (!next(true))
|
|
return syntax_error(value);
|
|
|
|
if (current() == '\\') {
|
|
if (!next(true))
|
|
return syntax_error(value);
|
|
|
|
switch (current()) {
|
|
case 'a':
|
|
value->c = '\a';
|
|
break;
|
|
|
|
case 'b':
|
|
value->c = '\b';
|
|
break;
|
|
|
|
case 'f':
|
|
value->c = '\f';
|
|
break;
|
|
|
|
case 'n':
|
|
value->c = '\n';
|
|
break;
|
|
|
|
case 'r':
|
|
value->c = '\r';
|
|
break;
|
|
|
|
case 't':
|
|
value->c = '\t';
|
|
break;
|
|
|
|
case 'v':
|
|
value->c = '\v';
|
|
break;
|
|
|
|
case '\\':
|
|
value->c = '\\';
|
|
break;
|
|
|
|
case '\'':
|
|
value->c = '\'';
|
|
break;
|
|
|
|
case '"':
|
|
value->c = '"';
|
|
break;
|
|
|
|
case '?':
|
|
value->c = '?';
|
|
break;
|
|
|
|
default:
|
|
return syntax_error(value);
|
|
}
|
|
}
|
|
else {
|
|
value->c = current();
|
|
}
|
|
|
|
if (!next(true) || current() != '\'')
|
|
return syntax_error(value);
|
|
|
|
next(true);
|
|
consume(false);
|
|
|
|
return TOK_CHAR;
|
|
|
|
case '{':
|
|
return lex_block(value, TOK_BLOCK, '{', '}');
|
|
|
|
case '[':
|
|
return lex_block(value, TOK_SQBLOCK, '[', ']');
|
|
|
|
case '"':
|
|
return lex_string(value);
|
|
|
|
case 'a' ... 'z':
|
|
case 'A' ... 'Z':
|
|
case '_':
|
|
return lex_symbol(value);
|
|
|
|
case '%':
|
|
dumb_mode = true;
|
|
|
|
token = current();
|
|
next(true);
|
|
consume(false);
|
|
return token;
|
|
|
|
default:
|
|
return syntax_error(value);
|
|
}
|
|
}
|
|
|
|
if (ferror(file))
|
|
return io_error(value);
|
|
|
|
return 0;
|
|
}
|
|
|
|
}
|