summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorMatthias Schiffer <mschiffer@universe-factory.net>2015-03-27 03:38:01 +0100
committerMatthias Schiffer <mschiffer@universe-factory.net>2015-03-27 03:55:26 +0100
commit8de90acc6791a62d4b2e48e9bd05daa0d5cfe4b6 (patch)
tree40cea5bf09f881c2a54063721aa52b2db811bad7
parent09972613c44cf9968afc17d7294aa5e77fc29b9c (diff)
downloadsolar-8de90acc6791a62d4b2e48e9bd05daa0d5cfe4b6.tar
solar-8de90acc6791a62d4b2e48e9bd05daa0d5cfe4b6.zip
Generate items from grammar
-rw-r--r--CMakeLists.txt9
-rw-r--r--src/CMakeLists.txt7
-rw-r--r--src/item.hpp (renamed from state.hpp)69
-rw-r--r--src/lex.cpp (renamed from lex.cpp)17
-rw-r--r--src/lex.hpp (renamed from lex.hpp)12
-rw-r--r--src/parser.cpp (renamed from parser.cpp)27
-rw-r--r--src/parser.hpp (renamed from parser.hpp)3
-rw-r--r--src/solar.cpp (renamed from solar.cpp)21
-rw-r--r--src/state.cpp67
-rw-r--r--src/state.hpp (renamed from state.cpp)40
-rw-r--r--src/symbol.hpp71
11 files changed, 251 insertions, 92 deletions
diff --git a/CMakeLists.txt b/CMakeLists.txt
index 8e04b01..4a2daeb 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -1,11 +1,4 @@
cmake_minimum_required(VERSION 2.8.3)
project(SOLAR CXX)
-
-add_executable(solar
- lex.cpp
- parser.cpp
- solar.cpp
- state.cpp
-)
-set_target_properties(solar PROPERTIES COMPILE_FLAGS "-std=c++11 -Wall")
+add_subdirectory(src)
diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt
new file mode 100644
index 0000000..bb5e874
--- /dev/null
+++ b/src/CMakeLists.txt
@@ -0,0 +1,7 @@
+add_executable(solar
+ lex.cpp
+ parser.cpp
+ solar.cpp
+ state.cpp
+)
+set_target_properties(solar PROPERTIES COMPILE_FLAGS "-std=c++11 -Wall")
diff --git a/state.hpp b/src/item.hpp
index fc3af67..58987b8 100644
--- a/state.hpp
+++ b/src/item.hpp
@@ -26,54 +26,55 @@
#pragma once
-#include <string>
-#include <unordered_set>
-#include <unordered_map>
-#include <utility>
+#include "symbol.hpp"
+
#include <vector>
namespace solar {
-class state_t {
-private:
- struct sym_t {
- virtual ~sym_t() {}
- };
-
- struct nonterm_sym_t : public sym_t {
- std::string value;
-
- nonterm_sym_t(const std::string &value0) : value(value0) {}
- };
+struct item_t : public std::tuple<std::string, std::vector<symbol_t>, unsigned> {
+ item_t(const std::string &lhs)
+ : std::tuple<std::string, std::vector<symbol_t>, unsigned>(lhs, std::vector<symbol_t>(), 0) {}
- struct term_sym_t : public sym_t {
- std::string value;
+ const std::string & get_lhs() const {
+ return std::get<0>(*this);
+ }
- term_sym_t(const std::string &value0) : value(value0) {}
- };
+ std::string & get_lhs() {
+ return std::get<0>(*this);
+ }
- struct char_sym_t : public sym_t {
- unsigned char value;
+ const std::vector<symbol_t> & get_rhs() const {
+ return std::get<1>(*this);
+ }
- char_sym_t(unsigned char value0) : value(value0) {}
- };
+ std::vector<symbol_t> & get_rhs() {
+ return std::get<1>(*this);
+ }
+ unsigned get_point() const {
+ return std::get<2>(*this);
+ }
- typedef std::vector<sym_t> rhs_t;
+ unsigned & get_point() {
+ return std::get<2>(*this);
+ }
- std::unordered_set<std::string> terminals;
- std::unordered_multimap<std::string, rhs_t> rules;
+ bool can_shift() const {
+ return get_point() < get_rhs().size();
+ }
- std::string current_nonterm;
- rhs_t current_rule;
+ void shift() {
+ get_point()++;
+ }
-public:
- void openRule(const std::string &nonterm);
- void addRuleTerminal(const std::string &term);
- void addRuleTerminal(unsigned char term);
- void addRuleNonterminal(const std::string &nonterm);
- void closeRule();
+ symbol_t get_next_symbol() const {
+ if (can_shift())
+ return get_rhs()[get_point()];
+ else
+ return symbol_t::make_end();
+ }
};
}
diff --git a/lex.cpp b/src/lex.cpp
index 59d17f2..0587689 100644
--- a/lex.cpp
+++ b/src/lex.cpp
@@ -130,6 +130,7 @@ int lex_t::consume_comment(value_t *value) {
return -1;
}
+/*
int lex_t::unterminated_string(value_t *value) {
if (ferror(file))
return io_error(value);
@@ -186,7 +187,7 @@ int lex_t::lex_string(value_t *value) {
consume(true);
return TOK_STRING;
-}
+ }*/
int lex_t::lex_number(value_t *value) {
if (needspace)
@@ -230,7 +231,7 @@ int lex_t::lex_keyword(value_t *value) {
}
char *token = get_token();
- const keyword_t key = { .keyword = token };
+ const keyword_t key = { .keyword = token, .token = 0 };
const keyword_t *ret = static_cast<const keyword_t*>(bsearch(&key, keywords, array_size(keywords), sizeof(keyword_t), compare_keywords));
free(token);
@@ -274,10 +275,6 @@ int lex_t::lex_symbol(value_t *value, bool terminal) {
return terminal ? TOK_TERM : TOK_NONTERM;
}
-lex_t::lex_t(FILE *file0) : file(file0), needspace(false), start(0), end(0), tok_len(0) {
- advance();
-}
-
int lex_t::lex(value_t *value) {
int token;
@@ -320,8 +317,8 @@ int lex_t::lex(value_t *value) {
if (current() != '/')
return syntax_error(value);
- /* fall-through */
- case '#':
+ ///* fall-through */
+ //case '#':
while (next(true)) {
if (current() == '\n')
break;
@@ -346,8 +343,8 @@ int lex_t::lex(value_t *value) {
return TOK_CHAR;
- case '"':
- return lex_string(value);
+ //case '"':
+ //return lex_string(value);
case '0' ... '9':
return lex_number(value);
diff --git a/lex.hpp b/src/lex.hpp
index ac2a276..8898d0b 100644
--- a/lex.hpp
+++ b/src/lex.hpp
@@ -55,7 +55,7 @@ private:
size_t start;
size_t end;
size_t tok_len;
- char buffer[1024];
+ char buffer[65556];
bool advance();
@@ -65,7 +65,7 @@ private:
int io_error(value_t *value);
int syntax_error(value_t *value);
int consume_comment(value_t *value);
- int unterminated_string(value_t *value);
+ //int unterminated_string(value_t *value);
int lex_string(value_t *value);
int lex_address(value_t *value);
@@ -84,7 +84,13 @@ private:
public:
- lex_t(FILE *file);
+ lex_t(FILE *file0) : file(file0), needspace(false), start(0), end(0), tok_len(0) {
+ advance();
+ }
+
+ ~lex_t() {
+ fclose(file);
+ }
int lex(value_t *value);
diff --git a/parser.cpp b/src/parser.cpp
index 637f945..bd4dfbf 100644
--- a/parser.cpp
+++ b/src/parser.cpp
@@ -26,7 +26,6 @@
#include "parser.hpp"
-#include <cstdio>
#include <cstdlib>
@@ -50,13 +49,14 @@ parser_t * parser_alloc(void) {
return parser;
}
-int parser_parse(parser_t *parser, int token, const value_t *value, state_t *state) {
+int parser_push(parser_t *parser, int token, const value_t *value, state_t *state) {
switch (parser->state) {
case STATE_INIT:
switch (token) {
case TOK_NONTERM:
parser->state = STATE_RULE_BAR;
- state->openRule(value->str);
+ state->new_rule(value->str);
+ free(value->str);
return 1;
case 0:
@@ -66,9 +66,10 @@ int parser_parse(parser_t *parser, int token, const value_t *value, state_t *sta
break;
case STATE_RULE_BAR:
+ if (token == '|') {
parser->state = STATE_RULE_EQUAL;
- if (token == '|')
return 1;
+ }
break;
@@ -83,20 +84,22 @@ int parser_parse(parser_t *parser, int token, const value_t *value, state_t *sta
case STATE_RULE:
switch (token) {
case TOK_NONTERM:
- state->addRuleNonterminal(value->str);
+ state->add_rule_nonterminal(value->str);
+ free(value->str);
return 1;
case TOK_TERM:
- state->addRuleTerminal(value->str);
+ state->add_rule_terminal(value->str);
+ free(value->str);
return 1;
case TOK_CHAR:
- state->addRuleTerminal(value->number);
+ state->add_rule_terminal(value->number);
return 1;
case TOK_BLOCK:
case ';':
- state->closeRule();
+ state->add_rule();
parser->state = STATE_INIT;
return 1;
}
@@ -104,6 +107,14 @@ int parser_parse(parser_t *parser, int token, const value_t *value, state_t *sta
break;
}
+ switch (token) {
+ case TOK_NONTERM:
+ case TOK_TERM:
+ case TOK_CHAR:
+ case TOK_BLOCK:
+ free(value->str);;
+ }
+
return -1;
}
diff --git a/parser.hpp b/src/parser.hpp
index bd187e7..c5a1e48 100644
--- a/parser.hpp
+++ b/src/parser.hpp
@@ -38,7 +38,6 @@ enum token_t {
TOK_NONTERM,
TOK_BLOCK,
TOK_CHAR,
- TOK_STRING,
TOK_UINT,
};
@@ -53,7 +52,7 @@ typedef struct parser parser_t;
parser_t * parser_alloc(void);
-int parser_parse(parser_t *parser, int token, const value_t *value, state_t *state);
+int parser_push(parser_t *parser, int token, const value_t *value, state_t *state);
void parser_free(parser_t *parser);
}
diff --git a/solar.cpp b/src/solar.cpp
index 1efff7a..a5085a1 100644
--- a/solar.cpp
+++ b/src/solar.cpp
@@ -24,22 +24,23 @@
*/
-#include <cstdio>
-
#include "lex.hpp"
#include "parser.hpp"
+#include <cstdio>
+#include <memory>
+
namespace solar {
-bool readGrammar(const char *filename, state_t *state) {
+bool read_grammar(const char *filename, state_t *state) {
FILE *file = fopen(filename, "r");
if (!file) {
std::fprintf(stderr, "unable to open file %s\n", filename);
return false;
}
- lex_t lexer(file);
+ std::unique_ptr<lex_t> lexer(new lex_t(file));
parser_t *parser = parser_alloc();
int ret;
@@ -47,22 +48,24 @@ bool readGrammar(const char *filename, state_t *state) {
int token;
value_t value;
- token = lexer.lex(&value);
+ token = lexer->lex(&value);
if (token < 0) {
std::fprintf(stderr, "error: %s at %s:%i:%i\n", value.error, filename,
- lexer.get_location().first_line, lexer.get_location().first_column);
+ lexer->get_location().first_line, lexer->get_location().first_column);
return false;
}
- ret = parser_parse(parser, token, &value, state);
+ ret = parser_push(parser, token, &value, state);
} while (ret > 0);
if (ret < 0) {
std::fprintf(stderr, "error: parse error at %s:%i:%i\n", filename,
- lexer.get_location().first_line, lexer.get_location().first_column);
+ lexer->get_location().first_line, lexer->get_location().first_column);
return false;
}
+ parser_free(parser);
+
return true;
}
@@ -73,7 +76,7 @@ int main() {
using namespace solar;
state_t state;
- if (!readGrammar("grammar.y", &state))
+ if (!read_grammar("grammar.y", &state))
return 1;
return 0;
diff --git a/src/state.cpp b/src/state.cpp
new file mode 100644
index 0000000..e798dce
--- /dev/null
+++ b/src/state.cpp
@@ -0,0 +1,67 @@
+/*
+ Copyright (c) 2015, Matthias Schiffer <mschiffer@universe-factory.net>
+ All rights reserved.
+
+ Redistribution and use in source and binary forms, with or without
+ modification, are permitted provided that the following conditions are met:
+
+ 1. Redistributions of source code must retain the above copyright notice,
+ this list of conditions and the following disclaimer.
+ 2. Redistributions in binary form must reproduce the above copyright notice,
+ this list of conditions and the following disclaimer in the documentation
+ and/or other materials provided with the distribution.
+
+ THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+ DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
+ FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
+ SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
+ CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+ OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+*/
+
+
+#include "state.hpp"
+
+
+namespace solar {
+
+void state_t::new_rule(const char *nonterm) {
+ if (rules.empty()) {
+ // start rule
+ current.get_rhs().emplace_back(symbol_t::make_nonterm(nonterm));
+ add_rule();
+ }
+
+ current = item_t(nonterm);
+
+}
+
+void state_t::add_rule_nonterminal(const char *nonterm) {
+ current.get_rhs().emplace_back(symbol_t::make_nonterm(nonterm));
+}
+
+void state_t::add_rule_terminal(const char *term) {
+ current.get_rhs().emplace_back(symbol_t::make_term(term));
+}
+
+void state_t::add_rule_terminal(unsigned char term) {
+ current.get_rhs().emplace_back(symbol_t::make_char(term));
+}
+
+void state_t::add_rule() {
+ rules.emplace(current.get_lhs(), current);
+
+ while (true) {
+ items.emplace(current.get_next_symbol(), current);
+ if (!current.can_shift())
+ break;
+
+ current.shift();
+ }
+}
+
+}
diff --git a/state.cpp b/src/state.hpp
index 2b91740..8069ef0 100644
--- a/state.cpp
+++ b/src/state.hpp
@@ -24,31 +24,35 @@
*/
-#include "state.hpp"
+#pragma once
+
+#include "item.hpp"
+
+#include <map>
+#include <utility>
namespace solar {
-void state_t::openRule(const std::string &nonterm) {
- current_nonterm = nonterm;
- current_rule = rhs_t();
-}
+class state_t {
+private:
+ std::multimap<std::string, item_t> rules;
+ std::multimap<symbol_t, item_t> items;
-void state_t::addRuleNonterminal(const std::string &term) {
- current_rule.emplace_back(nonterm_sym_t(term));
-}
+ item_t current;
-void state_t::addRuleTerminal(const std::string &term) {
- terminals.emplace(term);
- current_rule.emplace_back(term_sym_t(term));
-}
+public:
+ state_t() : current("") {}
-void state_t::addRuleTerminal(unsigned char term) {
- current_rule.emplace_back(char_sym_t(term));
-}
+ const std::multimap<std::string, item_t> & get_rules() const {
+ return rules;
+ }
-void state_t::closeRule() {
- rules.emplace(std::move(current_nonterm), std::move(current_rule));
-}
+ void new_rule(const char *nonterm);
+ void add_rule_nonterminal(const char *nonterm);
+ void add_rule_terminal(const char *term);
+ void add_rule_terminal(unsigned char term);
+ void add_rule();
+};
}
diff --git a/src/symbol.hpp b/src/symbol.hpp
new file mode 100644
index 0000000..31b006d
--- /dev/null
+++ b/src/symbol.hpp
@@ -0,0 +1,71 @@
+/*
+ Copyright (c) 2015, Matthias Schiffer <mschiffer@universe-factory.net>
+ All rights reserved.
+
+ Redistribution and use in source and binary forms, with or without
+ modification, are permitted provided that the following conditions are met:
+
+ 1. Redistributions of source code must retain the above copyright notice,
+ this list of conditions and the following disclaimer.
+ 2. Redistributions in binary form must reproduce the above copyright notice,
+ this list of conditions and the following disclaimer in the documentation
+ and/or other materials provided with the distribution.
+
+ THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+ DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
+ FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
+ SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
+ CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+ OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+*/
+
+
+#pragma once
+
+#include <string>
+#include <tuple>
+
+
+namespace solar {
+
+enum symbol_type_t {
+ SYMBOL_TYPE_END,
+ SYMBOL_TYPE_NONTERM,
+ SYMBOL_TYPE_TERM,
+ SYMBOL_TYPE_CHAR,
+};
+
+struct symbol_t : public std::tuple<symbol_type_t, std::string> {
+ symbol_t(symbol_type_t type, const char *value) : std::tuple<symbol_type_t, std::string>(type, value) {}
+
+ symbol_type_t get_type() const {
+ return std::get<0>(*this);
+ }
+
+ const std::string & get_value() const {
+ return std::get<1>(*this);
+ }
+
+ static symbol_t make_end() {
+ return symbol_t(SYMBOL_TYPE_END, "");
+ }
+
+ static symbol_t make_nonterm(const char *value) {
+ return symbol_t(SYMBOL_TYPE_NONTERM, value);
+ }
+
+ static symbol_t make_term(const char *value) {
+ return symbol_t(SYMBOL_TYPE_TERM, value);
+ }
+
+ static symbol_t make_char(unsigned char value) {
+ char v[2] = {char(value), 0};
+ return symbol_t(SYMBOL_TYPE_CHAR, v);
+ }
+};
+
+}