From 8de90acc6791a62d4b2e48e9bd05daa0d5cfe4b6 Mon Sep 17 00:00:00 2001 From: Matthias Schiffer Date: Fri, 27 Mar 2015 03:38:01 +0100 Subject: Generate items from grammar --- CMakeLists.txt | 9 +- lex.cpp | 372 ----------------------------------------------------- lex.hpp | 96 -------------- parser.cpp | 114 ---------------- parser.hpp | 59 --------- solar.cpp | 80 ------------ src/CMakeLists.txt | 7 + src/item.hpp | 80 ++++++++++++ src/lex.cpp | 369 ++++++++++++++++++++++++++++++++++++++++++++++++++++ src/lex.hpp | 102 +++++++++++++++ src/parser.cpp | 125 ++++++++++++++++++ src/parser.hpp | 58 +++++++++ src/solar.cpp | 83 ++++++++++++ src/state.cpp | 67 ++++++++++ src/state.hpp | 58 +++++++++ src/symbol.hpp | 71 ++++++++++ state.cpp | 54 -------- state.hpp | 79 ------------ 18 files changed, 1021 insertions(+), 862 deletions(-) delete mode 100644 lex.cpp delete mode 100644 lex.hpp delete mode 100644 parser.cpp delete mode 100644 parser.hpp delete mode 100644 solar.cpp create mode 100644 src/CMakeLists.txt create mode 100644 src/item.hpp create mode 100644 src/lex.cpp create mode 100644 src/lex.hpp create mode 100644 src/parser.cpp create mode 100644 src/parser.hpp create mode 100644 src/solar.cpp create mode 100644 src/state.cpp create mode 100644 src/state.hpp create mode 100644 src/symbol.hpp delete mode 100644 state.cpp delete mode 100644 state.hpp diff --git a/CMakeLists.txt b/CMakeLists.txt index 8e04b01..4a2daeb 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -1,11 +1,4 @@ cmake_minimum_required(VERSION 2.8.3) project(SOLAR CXX) - -add_executable(solar - lex.cpp - parser.cpp - solar.cpp - state.cpp -) -set_target_properties(solar PROPERTIES COMPILE_FLAGS "-std=c++11 -Wall") +add_subdirectory(src) diff --git a/lex.cpp b/lex.cpp deleted file mode 100644 index 59d17f2..0000000 --- a/lex.cpp +++ /dev/null @@ -1,372 +0,0 @@ -/* - Copyright (c) 2013-2014, Matthias Schiffer - All rights reserved. - - Redistribution and use in source and binary forms, with or without - modification, are permitted provided that the following conditions are met: - - 1. Redistributions of source code must retain the above copyright notice, - this list of conditions and the following disclaimer. - 2. Redistributions in binary form must reproduce the above copyright notice, - this list of conditions and the following disclaimer in the documentation - and/or other materials provided with the distribution. - - THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" - AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE - DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE - FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL - DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR - SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER - CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, - OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE - OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -*/ - - -#include "lex.hpp" - -#include - - -#define array_size(array) (sizeof(array)/sizeof((array)[0])) - - -namespace solar { - - -struct keyword_t { - const char *keyword; - int token; -}; - -/* the keyword list must be sorted */ -static const keyword_t keywords[] = { -}; - -static int compare_keywords(const void *v1, const void *v2) { - const keyword_t *k1 = static_cast(v1), *k2 = static_cast(v2); - return std::strcmp(k1->keyword, k2->keyword); -} - - -bool lex_t::advance() { - if (start > 0) { - std::memmove(buffer, buffer+start, end - start); - end -= start; - start = 0; - } - - if (end == sizeof(buffer)) - return false; - - size_t l = std::fread(buffer+end, 1, sizeof(buffer) - end, file); - - end += l; - return l; -} - -bool lex_t::next(bool move) { - if (start + tok_len >= end) - return false; - - if (current() == '\n') { - loc.last_column = 0; - loc.last_line++; - } - else { - loc.last_column++; - } - - if (move) - start++; - else - tok_len++; - - - if (start + tok_len >= end) - return advance(); - - return true; -} - -void lex_t::consume(bool consume_needspace) { - start += tok_len; - tok_len = 0; - - needspace = consume_needspace; -} - -int lex_t::io_error(value_t *value) { - value->error = "I/O error"; - return -1; -} - -int lex_t::syntax_error(value_t *value) { - if (std::ferror(file)) - return io_error(value); - - value->error = "syntax error"; - return -1; -} - -int lex_t::consume_comment(value_t *value) { - char prev = 0; - - while (next(true)) { - if (prev == '*' && current() == '/') { - next(true); - consume(false); - return 0; - } - - prev = current(); - } - - if (std::ferror(file)) - return io_error(value); - - value->error = "unterminated block comment"; - return -1; -} - -int lex_t::unterminated_string(value_t *value) { - if (ferror(file)) - return io_error(value); - - value->error = "unterminated string"; - return -1; -} - -int lex_t::lex_string(value_t *value) { - char *buf = NULL; - size_t len = 1024; - size_t pos = 0; - - if (needspace) - return syntax_error(value); - - buf = static_cast(std::malloc(len)); - - while (true) { - if (!next(true)) { - std::free(buf); - return unterminated_string(value); - } - - char cur = current(); - - if (cur == '"') - break; - - if (cur == '\\') { - if (!next(true)) { - free(buf); - return unterminated_string(value); - } - - cur = current(); - - if (cur == '\n') - continue; - } - - if (pos >= len) { - len *= 2; - buf = static_cast(std::realloc(buf, len)); - } - - buf[pos++] = cur; - } - - value->str = strndup(buf, pos); - std::free(buf); - - next(true); - consume(true); - - return TOK_STRING; -} - -int lex_t::lex_number(value_t *value) { - if (needspace) - return syntax_error(value); - - while (next(false)) { - char cur = current(); - - if (cur >= '0' && cur <= '9') - continue; - - if (cur == 'x' || (cur >= 'a' && cur <= 'f') || (cur >= 'A' && cur <= 'F')) - continue; - - break; - } - - char *endptr, *token = get_token(); - value->number = std::strtoull(token, &endptr, 0); - - bool ok = !*endptr; - free(token); - - if (!ok) - return syntax_error(value); - - consume(true); - - return TOK_UINT; -} - -int lex_t::lex_keyword(value_t *value) { - if (needspace) - return syntax_error(value); - - while (next(false)) { - char cur = current(); - - if (!((cur >= 'a' && cur <= 'z') || (cur >= '0' && cur <= '9') || cur == '-')) - break; - } - - char *token = get_token(); - const keyword_t key = { .keyword = token }; - const keyword_t *ret = static_cast(bsearch(&key, keywords, array_size(keywords), sizeof(keyword_t), compare_keywords)); - free(token); - - if (!ret) - return syntax_error(value); - - consume(true); - - return ret->token; -} - -int lex_t::lex_symbol(value_t *value, bool terminal) { - if (needspace) - return syntax_error(value); - - while (next(false)) { - char cur = current(); - - switch (cur) { - case 'A' ... 'Z': - if (!terminal) - break; - - continue; - - case 'a' ... 'z': - if (terminal) - break; - - continue; - - case '0' ... '9': - case '_': - continue; - } - - break; - } - - value->str = get_token(); - return terminal ? TOK_TERM : TOK_NONTERM; -} - -lex_t::lex_t(FILE *file0) : file(file0), needspace(false), start(0), end(0), tok_len(0) { - advance(); -} - -int lex_t::lex(value_t *value) { - int token; - - while (end > start) { - loc.first_line = loc.last_line; - loc.first_column = loc.last_column+1; - - switch (current()) { - case ' ': - case '\n': - case '\t': - case '\r': - next(true); - consume(false); - continue; - - case ';': - case ':': - case '{': - case '}': - case '|': - case '=': - token = current(); - next(true); - consume(false); - return token; - - case '/': - if (!next(true)) - return syntax_error(value); - - if (current() == '*') { - token = consume_comment(value); - if (token) - return token; - - continue; - } - - if (current() != '/') - return syntax_error(value); - - /* fall-through */ - case '#': - while (next(true)) { - if (current() == '\n') - break; - } - - next(true); - consume(false); - continue; - - case '\'': - if (!next(true)) - return syntax_error(value); - - value->number = current(); - - if (!next(true) || current() != '\'') - return syntax_error(value); - - next(true); - - consume(false); - - return TOK_CHAR; - - case '"': - return lex_string(value); - - case '0' ... '9': - return lex_number(value); - - case 'a' ... 'z': - return lex_symbol(value, false); - - case 'A' ... 'Z': - return lex_symbol(value, true); - - default: - return syntax_error(value); - } - } - - if (ferror(file)) - return io_error(value); - - return 0; -} - -} diff --git a/lex.hpp b/lex.hpp deleted file mode 100644 index ac2a276..0000000 --- a/lex.hpp +++ /dev/null @@ -1,96 +0,0 @@ -/* - Copyright (c) 2013-2014, Matthias Schiffer - All rights reserved. - - Redistribution and use in source and binary forms, with or without - modification, are permitted provided that the following conditions are met: - - 1. Redistributions of source code must retain the above copyright notice, - this list of conditions and the following disclaimer. - 2. Redistributions in binary form must reproduce the above copyright notice, - this list of conditions and the following disclaimer in the documentation - and/or other materials provided with the distribution. - - THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" - AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE - DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE - FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL - DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR - SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER - CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, - OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE - OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -*/ - - -#pragma once - -#include "parser.hpp" - -#include -#include - - -namespace solar { - -struct location_t { -public: - int first_line; - int first_column; - int last_line; - int last_column; - - location_t() : first_line(1), first_column(0), last_line(1), last_column(0) {} -}; - -class lex_t { -private: - location_t loc; - - std::FILE *file; - - bool needspace; - - size_t start; - size_t end; - size_t tok_len; - char buffer[1024]; - - - bool advance(); - bool next(bool move); - void consume(bool needspace); - - int io_error(value_t *value); - int syntax_error(value_t *value); - int consume_comment(value_t *value); - int unterminated_string(value_t *value); - - int lex_string(value_t *value); - int lex_address(value_t *value); - int lex_float(value_t *value); - int lex_number(value_t *value); - int lex_keyword(value_t *value); - int lex_symbol(value_t *value, bool terminal); - - char current() { - return buffer[start + tok_len]; - } - - char * get_token() { - return strndup(buffer+start, tok_len); - } - - -public: - lex_t(FILE *file); - - int lex(value_t *value); - - const location_t & get_location() const { - return loc; - } -}; - -} diff --git a/parser.cpp b/parser.cpp deleted file mode 100644 index 637f945..0000000 --- a/parser.cpp +++ /dev/null @@ -1,114 +0,0 @@ -/* - Copyright (c) 2015, Matthias Schiffer - All rights reserved. - - Redistribution and use in source and binary forms, with or without - modification, are permitted provided that the following conditions are met: - - 1. Redistributions of source code must retain the above copyright notice, - this list of conditions and the following disclaimer. - 2. Redistributions in binary form must reproduce the above copyright notice, - this list of conditions and the following disclaimer in the documentation - and/or other materials provided with the distribution. - - THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" - AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE - DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE - FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL - DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR - SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER - CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, - OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE - OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -*/ - - -#include "parser.hpp" - -#include -#include - - -namespace solar { - -enum parser_state { - STATE_INIT, - STATE_RULE_BAR, - STATE_RULE_EQUAL, - STATE_RULE, -}; - -struct parser { - parser_state state; -}; - -parser_t * parser_alloc(void) { - parser_t *parser = (parser_t *)std::malloc(sizeof(parser_t)); - parser->state = STATE_INIT; - - return parser; -} - -int parser_parse(parser_t *parser, int token, const value_t *value, state_t *state) { - switch (parser->state) { - case STATE_INIT: - switch (token) { - case TOK_NONTERM: - parser->state = STATE_RULE_BAR; - state->openRule(value->str); - return 1; - - case 0: - return 0; - } - - break; - - case STATE_RULE_BAR: - parser->state = STATE_RULE_EQUAL; - if (token == '|') - return 1; - - break; - - case STATE_RULE_EQUAL: - if (token == '=') { - parser->state = STATE_RULE; - return 1; - } - - break; - - case STATE_RULE: - switch (token) { - case TOK_NONTERM: - state->addRuleNonterminal(value->str); - return 1; - - case TOK_TERM: - state->addRuleTerminal(value->str); - return 1; - - case TOK_CHAR: - state->addRuleTerminal(value->number); - return 1; - - case TOK_BLOCK: - case ';': - state->closeRule(); - parser->state = STATE_INIT; - return 1; - } - - break; - } - - return -1; -} - -void parser_free(parser_t *parser) { - std::free(parser); -} - -} diff --git a/parser.hpp b/parser.hpp deleted file mode 100644 index bd187e7..0000000 --- a/parser.hpp +++ /dev/null @@ -1,59 +0,0 @@ -/* - Copyright (c) 2015, Matthias Schiffer - All rights reserved. - - Redistribution and use in source and binary forms, with or without - modification, are permitted provided that the following conditions are met: - - 1. Redistributions of source code must retain the above copyright notice, - this list of conditions and the following disclaimer. - 2. Redistributions in binary form must reproduce the above copyright notice, - this list of conditions and the following disclaimer in the documentation - and/or other materials provided with the distribution. - - THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" - AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE - DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE - FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL - DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR - SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER - CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, - OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE - OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -*/ - - -#pragma once - -#include "state.hpp" - -#include - - -namespace solar { - -enum token_t { - TOK_TERM = 1, - TOK_NONTERM, - TOK_BLOCK, - TOK_CHAR, - TOK_STRING, - TOK_UINT, -}; - -typedef struct value { - char *str; - uint64_t number; - const char *error; -} value_t; - - -typedef struct parser parser_t; - - -parser_t * parser_alloc(void); -int parser_parse(parser_t *parser, int token, const value_t *value, state_t *state); -void parser_free(parser_t *parser); - -} diff --git a/solar.cpp b/solar.cpp deleted file mode 100644 index 1efff7a..0000000 --- a/solar.cpp +++ /dev/null @@ -1,80 +0,0 @@ -/* - Copyright (c) 2015, Matthias Schiffer - All rights reserved. - - Redistribution and use in source and binary forms, with or without - modification, are permitted provided that the following conditions are met: - - 1. Redistributions of source code must retain the above copyright notice, - this list of conditions and the following disclaimer. - 2. Redistributions in binary form must reproduce the above copyright notice, - this list of conditions and the following disclaimer in the documentation - and/or other materials provided with the distribution. - - THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" - AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE - DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE - FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL - DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR - SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER - CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, - OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE - OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -*/ - - -#include - -#include "lex.hpp" -#include "parser.hpp" - - -namespace solar { - -bool readGrammar(const char *filename, state_t *state) { - FILE *file = fopen(filename, "r"); - if (!file) { - std::fprintf(stderr, "unable to open file %s\n", filename); - return false; - } - - lex_t lexer(file); - parser_t *parser = parser_alloc(); - int ret; - - do { - int token; - value_t value; - - token = lexer.lex(&value); - if (token < 0) { - std::fprintf(stderr, "error: %s at %s:%i:%i\n", value.error, filename, - lexer.get_location().first_line, lexer.get_location().first_column); - return false; - } - - ret = parser_parse(parser, token, &value, state); - } while (ret > 0); - - if (ret < 0) { - std::fprintf(stderr, "error: parse error at %s:%i:%i\n", filename, - lexer.get_location().first_line, lexer.get_location().first_column); - return false; - } - - return true; -} - -} - - -int main() { - using namespace solar; - - state_t state; - if (!readGrammar("grammar.y", &state)) - return 1; - - return 0; -} diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt new file mode 100644 index 0000000..bb5e874 --- /dev/null +++ b/src/CMakeLists.txt @@ -0,0 +1,7 @@ +add_executable(solar + lex.cpp + parser.cpp + solar.cpp + state.cpp +) +set_target_properties(solar PROPERTIES COMPILE_FLAGS "-std=c++11 -Wall") diff --git a/src/item.hpp b/src/item.hpp new file mode 100644 index 0000000..58987b8 --- /dev/null +++ b/src/item.hpp @@ -0,0 +1,80 @@ +/* + Copyright (c) 2015, Matthias Schiffer + All rights reserved. + + Redistribution and use in source and binary forms, with or without + modification, are permitted provided that the following conditions are met: + + 1. Redistributions of source code must retain the above copyright notice, + this list of conditions and the following disclaimer. + 2. Redistributions in binary form must reproduce the above copyright notice, + this list of conditions and the following disclaimer in the documentation + and/or other materials provided with the distribution. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE + DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE + FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR + SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER + CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, + OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +*/ + + +#pragma once + +#include "symbol.hpp" + +#include + + +namespace solar { + +struct item_t : public std::tuple, unsigned> { + item_t(const std::string &lhs) + : std::tuple, unsigned>(lhs, std::vector(), 0) {} + + const std::string & get_lhs() const { + return std::get<0>(*this); + } + + std::string & get_lhs() { + return std::get<0>(*this); + } + + const std::vector & get_rhs() const { + return std::get<1>(*this); + } + + std::vector & get_rhs() { + return std::get<1>(*this); + } + + unsigned get_point() const { + return std::get<2>(*this); + } + + unsigned & get_point() { + return std::get<2>(*this); + } + + bool can_shift() const { + return get_point() < get_rhs().size(); + } + + void shift() { + get_point()++; + } + + symbol_t get_next_symbol() const { + if (can_shift()) + return get_rhs()[get_point()]; + else + return symbol_t::make_end(); + } +}; + +} diff --git a/src/lex.cpp b/src/lex.cpp new file mode 100644 index 0000000..0587689 --- /dev/null +++ b/src/lex.cpp @@ -0,0 +1,369 @@ +/* + Copyright (c) 2013-2014, Matthias Schiffer + All rights reserved. + + Redistribution and use in source and binary forms, with or without + modification, are permitted provided that the following conditions are met: + + 1. Redistributions of source code must retain the above copyright notice, + this list of conditions and the following disclaimer. + 2. Redistributions in binary form must reproduce the above copyright notice, + this list of conditions and the following disclaimer in the documentation + and/or other materials provided with the distribution. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE + DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE + FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR + SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER + CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, + OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +*/ + + +#include "lex.hpp" + +#include + + +#define array_size(array) (sizeof(array)/sizeof((array)[0])) + + +namespace solar { + + +struct keyword_t { + const char *keyword; + int token; +}; + +/* the keyword list must be sorted */ +static const keyword_t keywords[] = { +}; + +static int compare_keywords(const void *v1, const void *v2) { + const keyword_t *k1 = static_cast(v1), *k2 = static_cast(v2); + return std::strcmp(k1->keyword, k2->keyword); +} + + +bool lex_t::advance() { + if (start > 0) { + std::memmove(buffer, buffer+start, end - start); + end -= start; + start = 0; + } + + if (end == sizeof(buffer)) + return false; + + size_t l = std::fread(buffer+end, 1, sizeof(buffer) - end, file); + + end += l; + return l; +} + +bool lex_t::next(bool move) { + if (start + tok_len >= end) + return false; + + if (current() == '\n') { + loc.last_column = 0; + loc.last_line++; + } + else { + loc.last_column++; + } + + if (move) + start++; + else + tok_len++; + + + if (start + tok_len >= end) + return advance(); + + return true; +} + +void lex_t::consume(bool consume_needspace) { + start += tok_len; + tok_len = 0; + + needspace = consume_needspace; +} + +int lex_t::io_error(value_t *value) { + value->error = "I/O error"; + return -1; +} + +int lex_t::syntax_error(value_t *value) { + if (std::ferror(file)) + return io_error(value); + + value->error = "syntax error"; + return -1; +} + +int lex_t::consume_comment(value_t *value) { + char prev = 0; + + while (next(true)) { + if (prev == '*' && current() == '/') { + next(true); + consume(false); + return 0; + } + + prev = current(); + } + + if (std::ferror(file)) + return io_error(value); + + value->error = "unterminated block comment"; + return -1; +} + +/* +int lex_t::unterminated_string(value_t *value) { + if (ferror(file)) + return io_error(value); + + value->error = "unterminated string"; + return -1; +} + +int lex_t::lex_string(value_t *value) { + char *buf = NULL; + size_t len = 1024; + size_t pos = 0; + + if (needspace) + return syntax_error(value); + + buf = static_cast(std::malloc(len)); + + while (true) { + if (!next(true)) { + std::free(buf); + return unterminated_string(value); + } + + char cur = current(); + + if (cur == '"') + break; + + if (cur == '\\') { + if (!next(true)) { + free(buf); + return unterminated_string(value); + } + + cur = current(); + + if (cur == '\n') + continue; + } + + if (pos >= len) { + len *= 2; + buf = static_cast(std::realloc(buf, len)); + } + + buf[pos++] = cur; + } + + value->str = strndup(buf, pos); + std::free(buf); + + next(true); + consume(true); + + return TOK_STRING; + }*/ + +int lex_t::lex_number(value_t *value) { + if (needspace) + return syntax_error(value); + + while (next(false)) { + char cur = current(); + + if (cur >= '0' && cur <= '9') + continue; + + if (cur == 'x' || (cur >= 'a' && cur <= 'f') || (cur >= 'A' && cur <= 'F')) + continue; + + break; + } + + char *endptr, *token = get_token(); + value->number = std::strtoull(token, &endptr, 0); + + bool ok = !*endptr; + free(token); + + if (!ok) + return syntax_error(value); + + consume(true); + + return TOK_UINT; +} + +int lex_t::lex_keyword(value_t *value) { + if (needspace) + return syntax_error(value); + + while (next(false)) { + char cur = current(); + + if (!((cur >= 'a' && cur <= 'z') || (cur >= '0' && cur <= '9') || cur == '-')) + break; + } + + char *token = get_token(); + const keyword_t key = { .keyword = token, .token = 0 }; + const keyword_t *ret = static_cast(bsearch(&key, keywords, array_size(keywords), sizeof(keyword_t), compare_keywords)); + free(token); + + if (!ret) + return syntax_error(value); + + consume(true); + + return ret->token; +} + +int lex_t::lex_symbol(value_t *value, bool terminal) { + if (needspace) + return syntax_error(value); + + while (next(false)) { + char cur = current(); + + switch (cur) { + case 'A' ... 'Z': + if (!terminal) + break; + + continue; + + case 'a' ... 'z': + if (terminal) + break; + + continue; + + case '0' ... '9': + case '_': + continue; + } + + break; + } + + value->str = get_token(); + return terminal ? TOK_TERM : TOK_NONTERM; +} + +int lex_t::lex(value_t *value) { + int token; + + while (end > start) { + loc.first_line = loc.last_line; + loc.first_column = loc.last_column+1; + + switch (current()) { + case ' ': + case '\n': + case '\t': + case '\r': + next(true); + consume(false); + continue; + + case ';': + case ':': + case '{': + case '}': + case '|': + case '=': + token = current(); + next(true); + consume(false); + return token; + + case '/': + if (!next(true)) + return syntax_error(value); + + if (current() == '*') { + token = consume_comment(value); + if (token) + return token; + + continue; + } + + if (current() != '/') + return syntax_error(value); + + ///* fall-through */ + //case '#': + while (next(true)) { + if (current() == '\n') + break; + } + + next(true); + consume(false); + continue; + + case '\'': + if (!next(true)) + return syntax_error(value); + + value->number = current(); + + if (!next(true) || current() != '\'') + return syntax_error(value); + + next(true); + + consume(false); + + return TOK_CHAR; + + //case '"': + //return lex_string(value); + + case '0' ... '9': + return lex_number(value); + + case 'a' ... 'z': + return lex_symbol(value, false); + + case 'A' ... 'Z': + return lex_symbol(value, true); + + default: + return syntax_error(value); + } + } + + if (ferror(file)) + return io_error(value); + + return 0; +} + +} diff --git a/src/lex.hpp b/src/lex.hpp new file mode 100644 index 0000000..8898d0b --- /dev/null +++ b/src/lex.hpp @@ -0,0 +1,102 @@ +/* + Copyright (c) 2013-2014, Matthias Schiffer + All rights reserved. + + Redistribution and use in source and binary forms, with or without + modification, are permitted provided that the following conditions are met: + + 1. Redistributions of source code must retain the above copyright notice, + this list of conditions and the following disclaimer. + 2. Redistributions in binary form must reproduce the above copyright notice, + this list of conditions and the following disclaimer in the documentation + and/or other materials provided with the distribution. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE + DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE + FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR + SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER + CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, + OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +*/ + + +#pragma once + +#include "parser.hpp" + +#include +#include + + +namespace solar { + +struct location_t { +public: + int first_line; + int first_column; + int last_line; + int last_column; + + location_t() : first_line(1), first_column(0), last_line(1), last_column(0) {} +}; + +class lex_t { +private: + location_t loc; + + std::FILE *file; + + bool needspace; + + size_t start; + size_t end; + size_t tok_len; + char buffer[65556]; + + + bool advance(); + bool next(bool move); + void consume(bool needspace); + + int io_error(value_t *value); + int syntax_error(value_t *value); + int consume_comment(value_t *value); + //int unterminated_string(value_t *value); + + int lex_string(value_t *value); + int lex_address(value_t *value); + int lex_float(value_t *value); + int lex_number(value_t *value); + int lex_keyword(value_t *value); + int lex_symbol(value_t *value, bool terminal); + + char current() { + return buffer[start + tok_len]; + } + + char * get_token() { + return strndup(buffer+start, tok_len); + } + + +public: + lex_t(FILE *file0) : file(file0), needspace(false), start(0), end(0), tok_len(0) { + advance(); + } + + ~lex_t() { + fclose(file); + } + + int lex(value_t *value); + + const location_t & get_location() const { + return loc; + } +}; + +} diff --git a/src/parser.cpp b/src/parser.cpp new file mode 100644 index 0000000..bd4dfbf --- /dev/null +++ b/src/parser.cpp @@ -0,0 +1,125 @@ +/* + Copyright (c) 2015, Matthias Schiffer + All rights reserved. + + Redistribution and use in source and binary forms, with or without + modification, are permitted provided that the following conditions are met: + + 1. Redistributions of source code must retain the above copyright notice, + this list of conditions and the following disclaimer. + 2. Redistributions in binary form must reproduce the above copyright notice, + this list of conditions and the following disclaimer in the documentation + and/or other materials provided with the distribution. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE + DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE + FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR + SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER + CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, + OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +*/ + + +#include "parser.hpp" + +#include + + +namespace solar { + +enum parser_state { + STATE_INIT, + STATE_RULE_BAR, + STATE_RULE_EQUAL, + STATE_RULE, +}; + +struct parser { + parser_state state; +}; + +parser_t * parser_alloc(void) { + parser_t *parser = (parser_t *)std::malloc(sizeof(parser_t)); + parser->state = STATE_INIT; + + return parser; +} + +int parser_push(parser_t *parser, int token, const value_t *value, state_t *state) { + switch (parser->state) { + case STATE_INIT: + switch (token) { + case TOK_NONTERM: + parser->state = STATE_RULE_BAR; + state->new_rule(value->str); + free(value->str); + return 1; + + case 0: + return 0; + } + + break; + + case STATE_RULE_BAR: + if (token == '|') { + parser->state = STATE_RULE_EQUAL; + return 1; + } + + break; + + case STATE_RULE_EQUAL: + if (token == '=') { + parser->state = STATE_RULE; + return 1; + } + + break; + + case STATE_RULE: + switch (token) { + case TOK_NONTERM: + state->add_rule_nonterminal(value->str); + free(value->str); + return 1; + + case TOK_TERM: + state->add_rule_terminal(value->str); + free(value->str); + return 1; + + case TOK_CHAR: + state->add_rule_terminal(value->number); + return 1; + + case TOK_BLOCK: + case ';': + state->add_rule(); + parser->state = STATE_INIT; + return 1; + } + + break; + } + + switch (token) { + case TOK_NONTERM: + case TOK_TERM: + case TOK_CHAR: + case TOK_BLOCK: + free(value->str);; + } + + return -1; +} + +void parser_free(parser_t *parser) { + std::free(parser); +} + +} diff --git a/src/parser.hpp b/src/parser.hpp new file mode 100644 index 0000000..c5a1e48 --- /dev/null +++ b/src/parser.hpp @@ -0,0 +1,58 @@ +/* + Copyright (c) 2015, Matthias Schiffer + All rights reserved. + + Redistribution and use in source and binary forms, with or without + modification, are permitted provided that the following conditions are met: + + 1. Redistributions of source code must retain the above copyright notice, + this list of conditions and the following disclaimer. + 2. Redistributions in binary form must reproduce the above copyright notice, + this list of conditions and the following disclaimer in the documentation + and/or other materials provided with the distribution. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE + DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE + FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR + SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER + CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, + OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +*/ + + +#pragma once + +#include "state.hpp" + +#include + + +namespace solar { + +enum token_t { + TOK_TERM = 1, + TOK_NONTERM, + TOK_BLOCK, + TOK_CHAR, + TOK_UINT, +}; + +typedef struct value { + char *str; + uint64_t number; + const char *error; +} value_t; + + +typedef struct parser parser_t; + + +parser_t * parser_alloc(void); +int parser_push(parser_t *parser, int token, const value_t *value, state_t *state); +void parser_free(parser_t *parser); + +} diff --git a/src/solar.cpp b/src/solar.cpp new file mode 100644 index 0000000..a5085a1 --- /dev/null +++ b/src/solar.cpp @@ -0,0 +1,83 @@ +/* + Copyright (c) 2015, Matthias Schiffer + All rights reserved. + + Redistribution and use in source and binary forms, with or without + modification, are permitted provided that the following conditions are met: + + 1. Redistributions of source code must retain the above copyright notice, + this list of conditions and the following disclaimer. + 2. Redistributions in binary form must reproduce the above copyright notice, + this list of conditions and the following disclaimer in the documentation + and/or other materials provided with the distribution. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE + DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE + FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR + SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER + CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, + OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +*/ + + +#include "lex.hpp" +#include "parser.hpp" + +#include +#include + + +namespace solar { + +bool read_grammar(const char *filename, state_t *state) { + FILE *file = fopen(filename, "r"); + if (!file) { + std::fprintf(stderr, "unable to open file %s\n", filename); + return false; + } + + std::unique_ptr lexer(new lex_t(file)); + parser_t *parser = parser_alloc(); + int ret; + + do { + int token; + value_t value; + + token = lexer->lex(&value); + if (token < 0) { + std::fprintf(stderr, "error: %s at %s:%i:%i\n", value.error, filename, + lexer->get_location().first_line, lexer->get_location().first_column); + return false; + } + + ret = parser_push(parser, token, &value, state); + } while (ret > 0); + + if (ret < 0) { + std::fprintf(stderr, "error: parse error at %s:%i:%i\n", filename, + lexer->get_location().first_line, lexer->get_location().first_column); + return false; + } + + parser_free(parser); + + return true; +} + +} + + +int main() { + using namespace solar; + + state_t state; + if (!read_grammar("grammar.y", &state)) + return 1; + + return 0; +} diff --git a/src/state.cpp b/src/state.cpp new file mode 100644 index 0000000..e798dce --- /dev/null +++ b/src/state.cpp @@ -0,0 +1,67 @@ +/* + Copyright (c) 2015, Matthias Schiffer + All rights reserved. + + Redistribution and use in source and binary forms, with or without + modification, are permitted provided that the following conditions are met: + + 1. Redistributions of source code must retain the above copyright notice, + this list of conditions and the following disclaimer. + 2. Redistributions in binary form must reproduce the above copyright notice, + this list of conditions and the following disclaimer in the documentation + and/or other materials provided with the distribution. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE + DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE + FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR + SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER + CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, + OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +*/ + + +#include "state.hpp" + + +namespace solar { + +void state_t::new_rule(const char *nonterm) { + if (rules.empty()) { + // start rule + current.get_rhs().emplace_back(symbol_t::make_nonterm(nonterm)); + add_rule(); + } + + current = item_t(nonterm); + +} + +void state_t::add_rule_nonterminal(const char *nonterm) { + current.get_rhs().emplace_back(symbol_t::make_nonterm(nonterm)); +} + +void state_t::add_rule_terminal(const char *term) { + current.get_rhs().emplace_back(symbol_t::make_term(term)); +} + +void state_t::add_rule_terminal(unsigned char term) { + current.get_rhs().emplace_back(symbol_t::make_char(term)); +} + +void state_t::add_rule() { + rules.emplace(current.get_lhs(), current); + + while (true) { + items.emplace(current.get_next_symbol(), current); + if (!current.can_shift()) + break; + + current.shift(); + } +} + +} diff --git a/src/state.hpp b/src/state.hpp new file mode 100644 index 0000000..8069ef0 --- /dev/null +++ b/src/state.hpp @@ -0,0 +1,58 @@ +/* + Copyright (c) 2015, Matthias Schiffer + All rights reserved. + + Redistribution and use in source and binary forms, with or without + modification, are permitted provided that the following conditions are met: + + 1. Redistributions of source code must retain the above copyright notice, + this list of conditions and the following disclaimer. + 2. Redistributions in binary form must reproduce the above copyright notice, + this list of conditions and the following disclaimer in the documentation + and/or other materials provided with the distribution. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE + DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE + FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR + SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER + CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, + OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +*/ + + +#pragma once + +#include "item.hpp" + +#include +#include + + +namespace solar { + +class state_t { +private: + std::multimap rules; + std::multimap items; + + item_t current; + +public: + state_t() : current("") {} + + const std::multimap & get_rules() const { + return rules; + } + + void new_rule(const char *nonterm); + void add_rule_nonterminal(const char *nonterm); + void add_rule_terminal(const char *term); + void add_rule_terminal(unsigned char term); + void add_rule(); +}; + +} diff --git a/src/symbol.hpp b/src/symbol.hpp new file mode 100644 index 0000000..31b006d --- /dev/null +++ b/src/symbol.hpp @@ -0,0 +1,71 @@ +/* + Copyright (c) 2015, Matthias Schiffer + All rights reserved. + + Redistribution and use in source and binary forms, with or without + modification, are permitted provided that the following conditions are met: + + 1. Redistributions of source code must retain the above copyright notice, + this list of conditions and the following disclaimer. + 2. Redistributions in binary form must reproduce the above copyright notice, + this list of conditions and the following disclaimer in the documentation + and/or other materials provided with the distribution. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE + DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE + FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR + SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER + CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, + OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +*/ + + +#pragma once + +#include +#include + + +namespace solar { + +enum symbol_type_t { + SYMBOL_TYPE_END, + SYMBOL_TYPE_NONTERM, + SYMBOL_TYPE_TERM, + SYMBOL_TYPE_CHAR, +}; + +struct symbol_t : public std::tuple { + symbol_t(symbol_type_t type, const char *value) : std::tuple(type, value) {} + + symbol_type_t get_type() const { + return std::get<0>(*this); + } + + const std::string & get_value() const { + return std::get<1>(*this); + } + + static symbol_t make_end() { + return symbol_t(SYMBOL_TYPE_END, ""); + } + + static symbol_t make_nonterm(const char *value) { + return symbol_t(SYMBOL_TYPE_NONTERM, value); + } + + static symbol_t make_term(const char *value) { + return symbol_t(SYMBOL_TYPE_TERM, value); + } + + static symbol_t make_char(unsigned char value) { + char v[2] = {char(value), 0}; + return symbol_t(SYMBOL_TYPE_CHAR, v); + } +}; + +} diff --git a/state.cpp b/state.cpp deleted file mode 100644 index 2b91740..0000000 --- a/state.cpp +++ /dev/null @@ -1,54 +0,0 @@ -/* - Copyright (c) 2015, Matthias Schiffer - All rights reserved. - - Redistribution and use in source and binary forms, with or without - modification, are permitted provided that the following conditions are met: - - 1. Redistributions of source code must retain the above copyright notice, - this list of conditions and the following disclaimer. - 2. Redistributions in binary form must reproduce the above copyright notice, - this list of conditions and the following disclaimer in the documentation - and/or other materials provided with the distribution. - - THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" - AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE - DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE - FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL - DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR - SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER - CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, - OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE - OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -*/ - - -#include "state.hpp" - - -namespace solar { - -void state_t::openRule(const std::string &nonterm) { - current_nonterm = nonterm; - current_rule = rhs_t(); -} - -void state_t::addRuleNonterminal(const std::string &term) { - current_rule.emplace_back(nonterm_sym_t(term)); -} - -void state_t::addRuleTerminal(const std::string &term) { - terminals.emplace(term); - current_rule.emplace_back(term_sym_t(term)); -} - -void state_t::addRuleTerminal(unsigned char term) { - current_rule.emplace_back(char_sym_t(term)); -} - -void state_t::closeRule() { - rules.emplace(std::move(current_nonterm), std::move(current_rule)); -} - -} diff --git a/state.hpp b/state.hpp deleted file mode 100644 index fc3af67..0000000 --- a/state.hpp +++ /dev/null @@ -1,79 +0,0 @@ -/* - Copyright (c) 2015, Matthias Schiffer - All rights reserved. - - Redistribution and use in source and binary forms, with or without - modification, are permitted provided that the following conditions are met: - - 1. Redistributions of source code must retain the above copyright notice, - this list of conditions and the following disclaimer. - 2. Redistributions in binary form must reproduce the above copyright notice, - this list of conditions and the following disclaimer in the documentation - and/or other materials provided with the distribution. - - THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" - AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE - DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE - FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL - DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR - SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER - CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, - OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE - OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -*/ - - -#pragma once - -#include -#include -#include -#include -#include - - -namespace solar { - -class state_t { -private: - struct sym_t { - virtual ~sym_t() {} - }; - - struct nonterm_sym_t : public sym_t { - std::string value; - - nonterm_sym_t(const std::string &value0) : value(value0) {} - }; - - struct term_sym_t : public sym_t { - std::string value; - - term_sym_t(const std::string &value0) : value(value0) {} - }; - - struct char_sym_t : public sym_t { - unsigned char value; - - char_sym_t(unsigned char value0) : value(value0) {} - }; - - - typedef std::vector rhs_t; - - std::unordered_set terminals; - std::unordered_multimap rules; - - std::string current_nonterm; - rhs_t current_rule; - -public: - void openRule(const std::string &nonterm); - void addRuleTerminal(const std::string &term); - void addRuleTerminal(unsigned char term); - void addRuleNonterminal(const std::string &nonterm); - void closeRule(); -}; - -} -- cgit v1.2.3