Some initial work

This commit is contained in:
Matthias Schiffer 2015-03-25 23:52:08 +01:00
commit 09972613c4
9 changed files with 866 additions and 0 deletions

1
.gitignore vendored Normal file
View file

@ -0,0 +1 @@
*~

11
CMakeLists.txt Normal file
View file

@ -0,0 +1,11 @@
cmake_minimum_required(VERSION 2.8.3)
project(SOLAR CXX)
add_executable(solar
lex.cpp
parser.cpp
solar.cpp
state.cpp
)
set_target_properties(solar PROPERTIES COMPILE_FLAGS "-std=c++11 -Wall")

372
lex.cpp Normal file
View file

@ -0,0 +1,372 @@
/*
Copyright (c) 2013-2014, Matthias Schiffer <mschiffer@universe-factory.net>
All rights reserved.
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions are met:
1. Redistributions of source code must retain the above copyright notice,
this list of conditions and the following disclaimer.
2. Redistributions in binary form must reproduce the above copyright notice,
this list of conditions and the following disclaimer in the documentation
and/or other materials provided with the distribution.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
#include "lex.hpp"
#include <cstdlib>
#define array_size(array) (sizeof(array)/sizeof((array)[0]))
namespace solar {
struct keyword_t {
const char *keyword;
int token;
};
/* the keyword list must be sorted */
static const keyword_t keywords[] = {
};
static int compare_keywords(const void *v1, const void *v2) {
const keyword_t *k1 = static_cast<const keyword_t*>(v1), *k2 = static_cast<const keyword_t*>(v2);
return std::strcmp(k1->keyword, k2->keyword);
}
bool lex_t::advance() {
if (start > 0) {
std::memmove(buffer, buffer+start, end - start);
end -= start;
start = 0;
}
if (end == sizeof(buffer))
return false;
size_t l = std::fread(buffer+end, 1, sizeof(buffer) - end, file);
end += l;
return l;
}
bool lex_t::next(bool move) {
if (start + tok_len >= end)
return false;
if (current() == '\n') {
loc.last_column = 0;
loc.last_line++;
}
else {
loc.last_column++;
}
if (move)
start++;
else
tok_len++;
if (start + tok_len >= end)
return advance();
return true;
}
void lex_t::consume(bool consume_needspace) {
start += tok_len;
tok_len = 0;
needspace = consume_needspace;
}
int lex_t::io_error(value_t *value) {
value->error = "I/O error";
return -1;
}
int lex_t::syntax_error(value_t *value) {
if (std::ferror(file))
return io_error(value);
value->error = "syntax error";
return -1;
}
int lex_t::consume_comment(value_t *value) {
char prev = 0;
while (next(true)) {
if (prev == '*' && current() == '/') {
next(true);
consume(false);
return 0;
}
prev = current();
}
if (std::ferror(file))
return io_error(value);
value->error = "unterminated block comment";
return -1;
}
int lex_t::unterminated_string(value_t *value) {
if (ferror(file))
return io_error(value);
value->error = "unterminated string";
return -1;
}
int lex_t::lex_string(value_t *value) {
char *buf = NULL;
size_t len = 1024;
size_t pos = 0;
if (needspace)
return syntax_error(value);
buf = static_cast<char*>(std::malloc(len));
while (true) {
if (!next(true)) {
std::free(buf);
return unterminated_string(value);
}
char cur = current();
if (cur == '"')
break;
if (cur == '\\') {
if (!next(true)) {
free(buf);
return unterminated_string(value);
}
cur = current();
if (cur == '\n')
continue;
}
if (pos >= len) {
len *= 2;
buf = static_cast<char*>(std::realloc(buf, len));
}
buf[pos++] = cur;
}
value->str = strndup(buf, pos);
std::free(buf);
next(true);
consume(true);
return TOK_STRING;
}
int lex_t::lex_number(value_t *value) {
if (needspace)
return syntax_error(value);
while (next(false)) {
char cur = current();
if (cur >= '0' && cur <= '9')
continue;
if (cur == 'x' || (cur >= 'a' && cur <= 'f') || (cur >= 'A' && cur <= 'F'))
continue;
break;
}
char *endptr, *token = get_token();
value->number = std::strtoull(token, &endptr, 0);
bool ok = !*endptr;
free(token);
if (!ok)
return syntax_error(value);
consume(true);
return TOK_UINT;
}
int lex_t::lex_keyword(value_t *value) {
if (needspace)
return syntax_error(value);
while (next(false)) {
char cur = current();
if (!((cur >= 'a' && cur <= 'z') || (cur >= '0' && cur <= '9') || cur == '-'))
break;
}
char *token = get_token();
const keyword_t key = { .keyword = token };
const keyword_t *ret = static_cast<const keyword_t*>(bsearch(&key, keywords, array_size(keywords), sizeof(keyword_t), compare_keywords));
free(token);
if (!ret)
return syntax_error(value);
consume(true);
return ret->token;
}
int lex_t::lex_symbol(value_t *value, bool terminal) {
if (needspace)
return syntax_error(value);
while (next(false)) {
char cur = current();
switch (cur) {
case 'A' ... 'Z':
if (!terminal)
break;
continue;
case 'a' ... 'z':
if (terminal)
break;
continue;
case '0' ... '9':
case '_':
continue;
}
break;
}
value->str = get_token();
return terminal ? TOK_TERM : TOK_NONTERM;
}
lex_t::lex_t(FILE *file0) : file(file0), needspace(false), start(0), end(0), tok_len(0) {
advance();
}
int lex_t::lex(value_t *value) {
int token;
while (end > start) {
loc.first_line = loc.last_line;
loc.first_column = loc.last_column+1;
switch (current()) {
case ' ':
case '\n':
case '\t':
case '\r':
next(true);
consume(false);
continue;
case ';':
case ':':
case '{':
case '}':
case '|':
case '=':
token = current();
next(true);
consume(false);
return token;
case '/':
if (!next(true))
return syntax_error(value);
if (current() == '*') {
token = consume_comment(value);
if (token)
return token;
continue;
}
if (current() != '/')
return syntax_error(value);
/* fall-through */
case '#':
while (next(true)) {
if (current() == '\n')
break;
}
next(true);
consume(false);
continue;
case '\'':
if (!next(true))
return syntax_error(value);
value->number = current();
if (!next(true) || current() != '\'')
return syntax_error(value);
next(true);
consume(false);
return TOK_CHAR;
case '"':
return lex_string(value);
case '0' ... '9':
return lex_number(value);
case 'a' ... 'z':
return lex_symbol(value, false);
case 'A' ... 'Z':
return lex_symbol(value, true);
default:
return syntax_error(value);
}
}
if (ferror(file))
return io_error(value);
return 0;
}
}

96
lex.hpp Normal file
View file

@ -0,0 +1,96 @@
/*
Copyright (c) 2013-2014, Matthias Schiffer <mschiffer@universe-factory.net>
All rights reserved.
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions are met:
1. Redistributions of source code must retain the above copyright notice,
this list of conditions and the following disclaimer.
2. Redistributions in binary form must reproduce the above copyright notice,
this list of conditions and the following disclaimer in the documentation
and/or other materials provided with the distribution.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
#pragma once
#include "parser.hpp"
#include <cstdio>
#include <cstring>
namespace solar {
struct location_t {
public:
int first_line;
int first_column;
int last_line;
int last_column;
location_t() : first_line(1), first_column(0), last_line(1), last_column(0) {}
};
class lex_t {
private:
location_t loc;
std::FILE *file;
bool needspace;
size_t start;
size_t end;
size_t tok_len;
char buffer[1024];
bool advance();
bool next(bool move);
void consume(bool needspace);
int io_error(value_t *value);
int syntax_error(value_t *value);
int consume_comment(value_t *value);
int unterminated_string(value_t *value);
int lex_string(value_t *value);
int lex_address(value_t *value);
int lex_float(value_t *value);
int lex_number(value_t *value);
int lex_keyword(value_t *value);
int lex_symbol(value_t *value, bool terminal);
char current() {
return buffer[start + tok_len];
}
char * get_token() {
return strndup(buffer+start, tok_len);
}
public:
lex_t(FILE *file);
int lex(value_t *value);
const location_t & get_location() const {
return loc;
}
};
}

114
parser.cpp Normal file
View file

@ -0,0 +1,114 @@
/*
Copyright (c) 2015, Matthias Schiffer <mschiffer@universe-factory.net>
All rights reserved.
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions are met:
1. Redistributions of source code must retain the above copyright notice,
this list of conditions and the following disclaimer.
2. Redistributions in binary form must reproduce the above copyright notice,
this list of conditions and the following disclaimer in the documentation
and/or other materials provided with the distribution.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
#include "parser.hpp"
#include <cstdio>
#include <cstdlib>
namespace solar {
enum parser_state {
STATE_INIT,
STATE_RULE_BAR,
STATE_RULE_EQUAL,
STATE_RULE,
};
struct parser {
parser_state state;
};
parser_t * parser_alloc(void) {
parser_t *parser = (parser_t *)std::malloc(sizeof(parser_t));
parser->state = STATE_INIT;
return parser;
}
int parser_parse(parser_t *parser, int token, const value_t *value, state_t *state) {
switch (parser->state) {
case STATE_INIT:
switch (token) {
case TOK_NONTERM:
parser->state = STATE_RULE_BAR;
state->openRule(value->str);
return 1;
case 0:
return 0;
}
break;
case STATE_RULE_BAR:
parser->state = STATE_RULE_EQUAL;
if (token == '|')
return 1;
break;
case STATE_RULE_EQUAL:
if (token == '=') {
parser->state = STATE_RULE;
return 1;
}
break;
case STATE_RULE:
switch (token) {
case TOK_NONTERM:
state->addRuleNonterminal(value->str);
return 1;
case TOK_TERM:
state->addRuleTerminal(value->str);
return 1;
case TOK_CHAR:
state->addRuleTerminal(value->number);
return 1;
case TOK_BLOCK:
case ';':
state->closeRule();
parser->state = STATE_INIT;
return 1;
}
break;
}
return -1;
}
void parser_free(parser_t *parser) {
std::free(parser);
}
}

59
parser.hpp Normal file
View file

@ -0,0 +1,59 @@
/*
Copyright (c) 2015, Matthias Schiffer <mschiffer@universe-factory.net>
All rights reserved.
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions are met:
1. Redistributions of source code must retain the above copyright notice,
this list of conditions and the following disclaimer.
2. Redistributions in binary form must reproduce the above copyright notice,
this list of conditions and the following disclaimer in the documentation
and/or other materials provided with the distribution.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
#pragma once
#include "state.hpp"
#include <cstdint>
namespace solar {
enum token_t {
TOK_TERM = 1,
TOK_NONTERM,
TOK_BLOCK,
TOK_CHAR,
TOK_STRING,
TOK_UINT,
};
typedef struct value {
char *str;
uint64_t number;
const char *error;
} value_t;
typedef struct parser parser_t;
parser_t * parser_alloc(void);
int parser_parse(parser_t *parser, int token, const value_t *value, state_t *state);
void parser_free(parser_t *parser);
}

80
solar.cpp Normal file
View file

@ -0,0 +1,80 @@
/*
Copyright (c) 2015, Matthias Schiffer <mschiffer@universe-factory.net>
All rights reserved.
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions are met:
1. Redistributions of source code must retain the above copyright notice,
this list of conditions and the following disclaimer.
2. Redistributions in binary form must reproduce the above copyright notice,
this list of conditions and the following disclaimer in the documentation
and/or other materials provided with the distribution.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
#include <cstdio>
#include "lex.hpp"
#include "parser.hpp"
namespace solar {
bool readGrammar(const char *filename, state_t *state) {
FILE *file = fopen(filename, "r");
if (!file) {
std::fprintf(stderr, "unable to open file %s\n", filename);
return false;
}
lex_t lexer(file);
parser_t *parser = parser_alloc();
int ret;
do {
int token;
value_t value;
token = lexer.lex(&value);
if (token < 0) {
std::fprintf(stderr, "error: %s at %s:%i:%i\n", value.error, filename,
lexer.get_location().first_line, lexer.get_location().first_column);
return false;
}
ret = parser_parse(parser, token, &value, state);
} while (ret > 0);
if (ret < 0) {
std::fprintf(stderr, "error: parse error at %s:%i:%i\n", filename,
lexer.get_location().first_line, lexer.get_location().first_column);
return false;
}
return true;
}
}
int main() {
using namespace solar;
state_t state;
if (!readGrammar("grammar.y", &state))
return 1;
return 0;
}

54
state.cpp Normal file
View file

@ -0,0 +1,54 @@
/*
Copyright (c) 2015, Matthias Schiffer <mschiffer@universe-factory.net>
All rights reserved.
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions are met:
1. Redistributions of source code must retain the above copyright notice,
this list of conditions and the following disclaimer.
2. Redistributions in binary form must reproduce the above copyright notice,
this list of conditions and the following disclaimer in the documentation
and/or other materials provided with the distribution.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
#include "state.hpp"
namespace solar {
void state_t::openRule(const std::string &nonterm) {
current_nonterm = nonterm;
current_rule = rhs_t();
}
void state_t::addRuleNonterminal(const std::string &term) {
current_rule.emplace_back(nonterm_sym_t(term));
}
void state_t::addRuleTerminal(const std::string &term) {
terminals.emplace(term);
current_rule.emplace_back(term_sym_t(term));
}
void state_t::addRuleTerminal(unsigned char term) {
current_rule.emplace_back(char_sym_t(term));
}
void state_t::closeRule() {
rules.emplace(std::move(current_nonterm), std::move(current_rule));
}
}

79
state.hpp Normal file
View file

@ -0,0 +1,79 @@
/*
Copyright (c) 2015, Matthias Schiffer <mschiffer@universe-factory.net>
All rights reserved.
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions are met:
1. Redistributions of source code must retain the above copyright notice,
this list of conditions and the following disclaimer.
2. Redistributions in binary form must reproduce the above copyright notice,
this list of conditions and the following disclaimer in the documentation
and/or other materials provided with the distribution.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
#pragma once
#include <string>
#include <unordered_set>
#include <unordered_map>
#include <utility>
#include <vector>
namespace solar {
class state_t {
private:
struct sym_t {
virtual ~sym_t() {}
};
struct nonterm_sym_t : public sym_t {
std::string value;
nonterm_sym_t(const std::string &value0) : value(value0) {}
};
struct term_sym_t : public sym_t {
std::string value;
term_sym_t(const std::string &value0) : value(value0) {}
};
struct char_sym_t : public sym_t {
unsigned char value;
char_sym_t(unsigned char value0) : value(value0) {}
};
typedef std::vector<sym_t> rhs_t;
std::unordered_set<std::string> terminals;
std::unordered_multimap<std::string, rhs_t> rules;
std::string current_nonterm;
rhs_t current_rule;
public:
void openRule(const std::string &nonterm);
void addRuleTerminal(const std::string &term);
void addRuleTerminal(unsigned char term);
void addRuleNonterminal(const std::string &nonterm);
void closeRule();
};
}