diff options
-rw-r--r-- | CMakeLists.txt | 9 | ||||
-rw-r--r-- | src/CMakeLists.txt | 7 | ||||
-rw-r--r-- | src/item.hpp (renamed from state.hpp) | 69 | ||||
-rw-r--r-- | src/lex.cpp (renamed from lex.cpp) | 17 | ||||
-rw-r--r-- | src/lex.hpp (renamed from lex.hpp) | 12 | ||||
-rw-r--r-- | src/parser.cpp (renamed from parser.cpp) | 27 | ||||
-rw-r--r-- | src/parser.hpp (renamed from parser.hpp) | 3 | ||||
-rw-r--r-- | src/solar.cpp (renamed from solar.cpp) | 21 | ||||
-rw-r--r-- | src/state.cpp | 67 | ||||
-rw-r--r-- | src/state.hpp (renamed from state.cpp) | 40 | ||||
-rw-r--r-- | src/symbol.hpp | 71 |
11 files changed, 251 insertions, 92 deletions
diff --git a/CMakeLists.txt b/CMakeLists.txt index 8e04b01..4a2daeb 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -1,11 +1,4 @@ cmake_minimum_required(VERSION 2.8.3) project(SOLAR CXX) - -add_executable(solar - lex.cpp - parser.cpp - solar.cpp - state.cpp -) -set_target_properties(solar PROPERTIES COMPILE_FLAGS "-std=c++11 -Wall") +add_subdirectory(src) diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt new file mode 100644 index 0000000..bb5e874 --- /dev/null +++ b/src/CMakeLists.txt @@ -0,0 +1,7 @@ +add_executable(solar + lex.cpp + parser.cpp + solar.cpp + state.cpp +) +set_target_properties(solar PROPERTIES COMPILE_FLAGS "-std=c++11 -Wall") @@ -26,54 +26,55 @@ #pragma once -#include <string> -#include <unordered_set> -#include <unordered_map> -#include <utility> +#include "symbol.hpp" + #include <vector> namespace solar { -class state_t { -private: - struct sym_t { - virtual ~sym_t() {} - }; - - struct nonterm_sym_t : public sym_t { - std::string value; - - nonterm_sym_t(const std::string &value0) : value(value0) {} - }; +struct item_t : public std::tuple<std::string, std::vector<symbol_t>, unsigned> { + item_t(const std::string &lhs) + : std::tuple<std::string, std::vector<symbol_t>, unsigned>(lhs, std::vector<symbol_t>(), 0) {} - struct term_sym_t : public sym_t { - std::string value; + const std::string & get_lhs() const { + return std::get<0>(*this); + } - term_sym_t(const std::string &value0) : value(value0) {} - }; + std::string & get_lhs() { + return std::get<0>(*this); + } - struct char_sym_t : public sym_t { - unsigned char value; + const std::vector<symbol_t> & get_rhs() const { + return std::get<1>(*this); + } - char_sym_t(unsigned char value0) : value(value0) {} - }; + std::vector<symbol_t> & get_rhs() { + return std::get<1>(*this); + } + unsigned get_point() const { + return std::get<2>(*this); + } - typedef std::vector<sym_t> rhs_t; + unsigned & get_point() { + return std::get<2>(*this); + } - std::unordered_set<std::string> terminals; - std::unordered_multimap<std::string, rhs_t> rules; + bool can_shift() const { + return get_point() < get_rhs().size(); + } - std::string current_nonterm; - rhs_t current_rule; + void shift() { + get_point()++; + } -public: - void openRule(const std::string &nonterm); - void addRuleTerminal(const std::string &term); - void addRuleTerminal(unsigned char term); - void addRuleNonterminal(const std::string &nonterm); - void closeRule(); + symbol_t get_next_symbol() const { + if (can_shift()) + return get_rhs()[get_point()]; + else + return symbol_t::make_end(); + } }; } @@ -130,6 +130,7 @@ int lex_t::consume_comment(value_t *value) { return -1; } +/* int lex_t::unterminated_string(value_t *value) { if (ferror(file)) return io_error(value); @@ -186,7 +187,7 @@ int lex_t::lex_string(value_t *value) { consume(true); return TOK_STRING; -} + }*/ int lex_t::lex_number(value_t *value) { if (needspace) @@ -230,7 +231,7 @@ int lex_t::lex_keyword(value_t *value) { } char *token = get_token(); - const keyword_t key = { .keyword = token }; + const keyword_t key = { .keyword = token, .token = 0 }; const keyword_t *ret = static_cast<const keyword_t*>(bsearch(&key, keywords, array_size(keywords), sizeof(keyword_t), compare_keywords)); free(token); @@ -274,10 +275,6 @@ int lex_t::lex_symbol(value_t *value, bool terminal) { return terminal ? TOK_TERM : TOK_NONTERM; } -lex_t::lex_t(FILE *file0) : file(file0), needspace(false), start(0), end(0), tok_len(0) { - advance(); -} - int lex_t::lex(value_t *value) { int token; @@ -320,8 +317,8 @@ int lex_t::lex(value_t *value) { if (current() != '/') return syntax_error(value); - /* fall-through */ - case '#': + ///* fall-through */ + //case '#': while (next(true)) { if (current() == '\n') break; @@ -346,8 +343,8 @@ int lex_t::lex(value_t *value) { return TOK_CHAR; - case '"': - return lex_string(value); + //case '"': + //return lex_string(value); case '0' ... '9': return lex_number(value); @@ -55,7 +55,7 @@ private: size_t start; size_t end; size_t tok_len; - char buffer[1024]; + char buffer[65556]; bool advance(); @@ -65,7 +65,7 @@ private: int io_error(value_t *value); int syntax_error(value_t *value); int consume_comment(value_t *value); - int unterminated_string(value_t *value); + //int unterminated_string(value_t *value); int lex_string(value_t *value); int lex_address(value_t *value); @@ -84,7 +84,13 @@ private: public: - lex_t(FILE *file); + lex_t(FILE *file0) : file(file0), needspace(false), start(0), end(0), tok_len(0) { + advance(); + } + + ~lex_t() { + fclose(file); + } int lex(value_t *value); diff --git a/parser.cpp b/src/parser.cpp index 637f945..bd4dfbf 100644 --- a/parser.cpp +++ b/src/parser.cpp @@ -26,7 +26,6 @@ #include "parser.hpp" -#include <cstdio> #include <cstdlib> @@ -50,13 +49,14 @@ parser_t * parser_alloc(void) { return parser; } -int parser_parse(parser_t *parser, int token, const value_t *value, state_t *state) { +int parser_push(parser_t *parser, int token, const value_t *value, state_t *state) { switch (parser->state) { case STATE_INIT: switch (token) { case TOK_NONTERM: parser->state = STATE_RULE_BAR; - state->openRule(value->str); + state->new_rule(value->str); + free(value->str); return 1; case 0: @@ -66,9 +66,10 @@ int parser_parse(parser_t *parser, int token, const value_t *value, state_t *sta break; case STATE_RULE_BAR: + if (token == '|') { parser->state = STATE_RULE_EQUAL; - if (token == '|') return 1; + } break; @@ -83,20 +84,22 @@ int parser_parse(parser_t *parser, int token, const value_t *value, state_t *sta case STATE_RULE: switch (token) { case TOK_NONTERM: - state->addRuleNonterminal(value->str); + state->add_rule_nonterminal(value->str); + free(value->str); return 1; case TOK_TERM: - state->addRuleTerminal(value->str); + state->add_rule_terminal(value->str); + free(value->str); return 1; case TOK_CHAR: - state->addRuleTerminal(value->number); + state->add_rule_terminal(value->number); return 1; case TOK_BLOCK: case ';': - state->closeRule(); + state->add_rule(); parser->state = STATE_INIT; return 1; } @@ -104,6 +107,14 @@ int parser_parse(parser_t *parser, int token, const value_t *value, state_t *sta break; } + switch (token) { + case TOK_NONTERM: + case TOK_TERM: + case TOK_CHAR: + case TOK_BLOCK: + free(value->str);; + } + return -1; } diff --git a/parser.hpp b/src/parser.hpp index bd187e7..c5a1e48 100644 --- a/parser.hpp +++ b/src/parser.hpp @@ -38,7 +38,6 @@ enum token_t { TOK_NONTERM, TOK_BLOCK, TOK_CHAR, - TOK_STRING, TOK_UINT, }; @@ -53,7 +52,7 @@ typedef struct parser parser_t; parser_t * parser_alloc(void); -int parser_parse(parser_t *parser, int token, const value_t *value, state_t *state); +int parser_push(parser_t *parser, int token, const value_t *value, state_t *state); void parser_free(parser_t *parser); } diff --git a/solar.cpp b/src/solar.cpp index 1efff7a..a5085a1 100644 --- a/solar.cpp +++ b/src/solar.cpp @@ -24,22 +24,23 @@ */ -#include <cstdio> - #include "lex.hpp" #include "parser.hpp" +#include <cstdio> +#include <memory> + namespace solar { -bool readGrammar(const char *filename, state_t *state) { +bool read_grammar(const char *filename, state_t *state) { FILE *file = fopen(filename, "r"); if (!file) { std::fprintf(stderr, "unable to open file %s\n", filename); return false; } - lex_t lexer(file); + std::unique_ptr<lex_t> lexer(new lex_t(file)); parser_t *parser = parser_alloc(); int ret; @@ -47,22 +48,24 @@ bool readGrammar(const char *filename, state_t *state) { int token; value_t value; - token = lexer.lex(&value); + token = lexer->lex(&value); if (token < 0) { std::fprintf(stderr, "error: %s at %s:%i:%i\n", value.error, filename, - lexer.get_location().first_line, lexer.get_location().first_column); + lexer->get_location().first_line, lexer->get_location().first_column); return false; } - ret = parser_parse(parser, token, &value, state); + ret = parser_push(parser, token, &value, state); } while (ret > 0); if (ret < 0) { std::fprintf(stderr, "error: parse error at %s:%i:%i\n", filename, - lexer.get_location().first_line, lexer.get_location().first_column); + lexer->get_location().first_line, lexer->get_location().first_column); return false; } + parser_free(parser); + return true; } @@ -73,7 +76,7 @@ int main() { using namespace solar; state_t state; - if (!readGrammar("grammar.y", &state)) + if (!read_grammar("grammar.y", &state)) return 1; return 0; diff --git a/src/state.cpp b/src/state.cpp new file mode 100644 index 0000000..e798dce --- /dev/null +++ b/src/state.cpp @@ -0,0 +1,67 @@ +/* + Copyright (c) 2015, Matthias Schiffer <mschiffer@universe-factory.net> + All rights reserved. + + Redistribution and use in source and binary forms, with or without + modification, are permitted provided that the following conditions are met: + + 1. Redistributions of source code must retain the above copyright notice, + this list of conditions and the following disclaimer. + 2. Redistributions in binary form must reproduce the above copyright notice, + this list of conditions and the following disclaimer in the documentation + and/or other materials provided with the distribution. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE + DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE + FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR + SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER + CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, + OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +*/ + + +#include "state.hpp" + + +namespace solar { + +void state_t::new_rule(const char *nonterm) { + if (rules.empty()) { + // start rule + current.get_rhs().emplace_back(symbol_t::make_nonterm(nonterm)); + add_rule(); + } + + current = item_t(nonterm); + +} + +void state_t::add_rule_nonterminal(const char *nonterm) { + current.get_rhs().emplace_back(symbol_t::make_nonterm(nonterm)); +} + +void state_t::add_rule_terminal(const char *term) { + current.get_rhs().emplace_back(symbol_t::make_term(term)); +} + +void state_t::add_rule_terminal(unsigned char term) { + current.get_rhs().emplace_back(symbol_t::make_char(term)); +} + +void state_t::add_rule() { + rules.emplace(current.get_lhs(), current); + + while (true) { + items.emplace(current.get_next_symbol(), current); + if (!current.can_shift()) + break; + + current.shift(); + } +} + +} diff --git a/state.cpp b/src/state.hpp index 2b91740..8069ef0 100644 --- a/state.cpp +++ b/src/state.hpp @@ -24,31 +24,35 @@ */ -#include "state.hpp" +#pragma once + +#include "item.hpp" + +#include <map> +#include <utility> namespace solar { -void state_t::openRule(const std::string &nonterm) { - current_nonterm = nonterm; - current_rule = rhs_t(); -} +class state_t { +private: + std::multimap<std::string, item_t> rules; + std::multimap<symbol_t, item_t> items; -void state_t::addRuleNonterminal(const std::string &term) { - current_rule.emplace_back(nonterm_sym_t(term)); -} + item_t current; -void state_t::addRuleTerminal(const std::string &term) { - terminals.emplace(term); - current_rule.emplace_back(term_sym_t(term)); -} +public: + state_t() : current("") {} -void state_t::addRuleTerminal(unsigned char term) { - current_rule.emplace_back(char_sym_t(term)); -} + const std::multimap<std::string, item_t> & get_rules() const { + return rules; + } -void state_t::closeRule() { - rules.emplace(std::move(current_nonterm), std::move(current_rule)); -} + void new_rule(const char *nonterm); + void add_rule_nonterminal(const char *nonterm); + void add_rule_terminal(const char *term); + void add_rule_terminal(unsigned char term); + void add_rule(); +}; } diff --git a/src/symbol.hpp b/src/symbol.hpp new file mode 100644 index 0000000..31b006d --- /dev/null +++ b/src/symbol.hpp @@ -0,0 +1,71 @@ +/* + Copyright (c) 2015, Matthias Schiffer <mschiffer@universe-factory.net> + All rights reserved. + + Redistribution and use in source and binary forms, with or without + modification, are permitted provided that the following conditions are met: + + 1. Redistributions of source code must retain the above copyright notice, + this list of conditions and the following disclaimer. + 2. Redistributions in binary form must reproduce the above copyright notice, + this list of conditions and the following disclaimer in the documentation + and/or other materials provided with the distribution. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE + DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE + FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR + SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER + CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, + OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +*/ + + +#pragma once + +#include <string> +#include <tuple> + + +namespace solar { + +enum symbol_type_t { + SYMBOL_TYPE_END, + SYMBOL_TYPE_NONTERM, + SYMBOL_TYPE_TERM, + SYMBOL_TYPE_CHAR, +}; + +struct symbol_t : public std::tuple<symbol_type_t, std::string> { + symbol_t(symbol_type_t type, const char *value) : std::tuple<symbol_type_t, std::string>(type, value) {} + + symbol_type_t get_type() const { + return std::get<0>(*this); + } + + const std::string & get_value() const { + return std::get<1>(*this); + } + + static symbol_t make_end() { + return symbol_t(SYMBOL_TYPE_END, ""); + } + + static symbol_t make_nonterm(const char *value) { + return symbol_t(SYMBOL_TYPE_NONTERM, value); + } + + static symbol_t make_term(const char *value) { + return symbol_t(SYMBOL_TYPE_TERM, value); + } + + static symbol_t make_char(unsigned char value) { + char v[2] = {char(value), 0}; + return symbol_t(SYMBOL_TYPE_CHAR, v); + } +}; + +} |