Generate items from grammar

This commit is contained in:
Matthias Schiffer 2015-03-27 03:38:01 +01:00
parent 09972613c4
commit 8de90acc67
11 changed files with 251 additions and 92 deletions

View file

@ -1,11 +1,4 @@
cmake_minimum_required(VERSION 2.8.3) cmake_minimum_required(VERSION 2.8.3)
project(SOLAR CXX) project(SOLAR CXX)
add_subdirectory(src)
add_executable(solar
lex.cpp
parser.cpp
solar.cpp
state.cpp
)
set_target_properties(solar PROPERTIES COMPILE_FLAGS "-std=c++11 -Wall")

7
src/CMakeLists.txt Normal file
View file

@ -0,0 +1,7 @@
add_executable(solar
lex.cpp
parser.cpp
solar.cpp
state.cpp
)
set_target_properties(solar PROPERTIES COMPILE_FLAGS "-std=c++11 -Wall")

View file

@ -26,54 +26,55 @@
#pragma once #pragma once
#include <string> #include "symbol.hpp"
#include <unordered_set>
#include <unordered_map>
#include <utility>
#include <vector> #include <vector>
namespace solar { namespace solar {
class state_t { struct item_t : public std::tuple<std::string, std::vector<symbol_t>, unsigned> {
private: item_t(const std::string &lhs)
struct sym_t { : std::tuple<std::string, std::vector<symbol_t>, unsigned>(lhs, std::vector<symbol_t>(), 0) {}
virtual ~sym_t() {}
};
struct nonterm_sym_t : public sym_t { const std::string & get_lhs() const {
std::string value; return std::get<0>(*this);
}
nonterm_sym_t(const std::string &value0) : value(value0) {} std::string & get_lhs() {
}; return std::get<0>(*this);
}
struct term_sym_t : public sym_t { const std::vector<symbol_t> & get_rhs() const {
std::string value; return std::get<1>(*this);
}
term_sym_t(const std::string &value0) : value(value0) {} std::vector<symbol_t> & get_rhs() {
}; return std::get<1>(*this);
}
struct char_sym_t : public sym_t { unsigned get_point() const {
unsigned char value; return std::get<2>(*this);
}
char_sym_t(unsigned char value0) : value(value0) {} unsigned & get_point() {
}; return std::get<2>(*this);
}
bool can_shift() const {
return get_point() < get_rhs().size();
}
typedef std::vector<sym_t> rhs_t; void shift() {
get_point()++;
}
std::unordered_set<std::string> terminals; symbol_t get_next_symbol() const {
std::unordered_multimap<std::string, rhs_t> rules; if (can_shift())
return get_rhs()[get_point()];
std::string current_nonterm; else
rhs_t current_rule; return symbol_t::make_end();
}
public:
void openRule(const std::string &nonterm);
void addRuleTerminal(const std::string &term);
void addRuleTerminal(unsigned char term);
void addRuleNonterminal(const std::string &nonterm);
void closeRule();
}; };
} }

View file

@ -130,6 +130,7 @@ int lex_t::consume_comment(value_t *value) {
return -1; return -1;
} }
/*
int lex_t::unterminated_string(value_t *value) { int lex_t::unterminated_string(value_t *value) {
if (ferror(file)) if (ferror(file))
return io_error(value); return io_error(value);
@ -186,7 +187,7 @@ int lex_t::lex_string(value_t *value) {
consume(true); consume(true);
return TOK_STRING; return TOK_STRING;
} }*/
int lex_t::lex_number(value_t *value) { int lex_t::lex_number(value_t *value) {
if (needspace) if (needspace)
@ -230,7 +231,7 @@ int lex_t::lex_keyword(value_t *value) {
} }
char *token = get_token(); char *token = get_token();
const keyword_t key = { .keyword = token }; const keyword_t key = { .keyword = token, .token = 0 };
const keyword_t *ret = static_cast<const keyword_t*>(bsearch(&key, keywords, array_size(keywords), sizeof(keyword_t), compare_keywords)); const keyword_t *ret = static_cast<const keyword_t*>(bsearch(&key, keywords, array_size(keywords), sizeof(keyword_t), compare_keywords));
free(token); free(token);
@ -274,10 +275,6 @@ int lex_t::lex_symbol(value_t *value, bool terminal) {
return terminal ? TOK_TERM : TOK_NONTERM; return terminal ? TOK_TERM : TOK_NONTERM;
} }
lex_t::lex_t(FILE *file0) : file(file0), needspace(false), start(0), end(0), tok_len(0) {
advance();
}
int lex_t::lex(value_t *value) { int lex_t::lex(value_t *value) {
int token; int token;
@ -320,8 +317,8 @@ int lex_t::lex(value_t *value) {
if (current() != '/') if (current() != '/')
return syntax_error(value); return syntax_error(value);
/* fall-through */ ///* fall-through */
case '#': //case '#':
while (next(true)) { while (next(true)) {
if (current() == '\n') if (current() == '\n')
break; break;
@ -346,8 +343,8 @@ int lex_t::lex(value_t *value) {
return TOK_CHAR; return TOK_CHAR;
case '"': //case '"':
return lex_string(value); //return lex_string(value);
case '0' ... '9': case '0' ... '9':
return lex_number(value); return lex_number(value);

View file

@ -55,7 +55,7 @@ private:
size_t start; size_t start;
size_t end; size_t end;
size_t tok_len; size_t tok_len;
char buffer[1024]; char buffer[65556];
bool advance(); bool advance();
@ -65,7 +65,7 @@ private:
int io_error(value_t *value); int io_error(value_t *value);
int syntax_error(value_t *value); int syntax_error(value_t *value);
int consume_comment(value_t *value); int consume_comment(value_t *value);
int unterminated_string(value_t *value); //int unterminated_string(value_t *value);
int lex_string(value_t *value); int lex_string(value_t *value);
int lex_address(value_t *value); int lex_address(value_t *value);
@ -84,7 +84,13 @@ private:
public: public:
lex_t(FILE *file); lex_t(FILE *file0) : file(file0), needspace(false), start(0), end(0), tok_len(0) {
advance();
}
~lex_t() {
fclose(file);
}
int lex(value_t *value); int lex(value_t *value);

View file

@ -26,7 +26,6 @@
#include "parser.hpp" #include "parser.hpp"
#include <cstdio>
#include <cstdlib> #include <cstdlib>
@ -50,13 +49,14 @@ parser_t * parser_alloc(void) {
return parser; return parser;
} }
int parser_parse(parser_t *parser, int token, const value_t *value, state_t *state) { int parser_push(parser_t *parser, int token, const value_t *value, state_t *state) {
switch (parser->state) { switch (parser->state) {
case STATE_INIT: case STATE_INIT:
switch (token) { switch (token) {
case TOK_NONTERM: case TOK_NONTERM:
parser->state = STATE_RULE_BAR; parser->state = STATE_RULE_BAR;
state->openRule(value->str); state->new_rule(value->str);
free(value->str);
return 1; return 1;
case 0: case 0:
@ -66,9 +66,10 @@ int parser_parse(parser_t *parser, int token, const value_t *value, state_t *sta
break; break;
case STATE_RULE_BAR: case STATE_RULE_BAR:
if (token == '|') {
parser->state = STATE_RULE_EQUAL; parser->state = STATE_RULE_EQUAL;
if (token == '|')
return 1; return 1;
}
break; break;
@ -83,20 +84,22 @@ int parser_parse(parser_t *parser, int token, const value_t *value, state_t *sta
case STATE_RULE: case STATE_RULE:
switch (token) { switch (token) {
case TOK_NONTERM: case TOK_NONTERM:
state->addRuleNonterminal(value->str); state->add_rule_nonterminal(value->str);
free(value->str);
return 1; return 1;
case TOK_TERM: case TOK_TERM:
state->addRuleTerminal(value->str); state->add_rule_terminal(value->str);
free(value->str);
return 1; return 1;
case TOK_CHAR: case TOK_CHAR:
state->addRuleTerminal(value->number); state->add_rule_terminal(value->number);
return 1; return 1;
case TOK_BLOCK: case TOK_BLOCK:
case ';': case ';':
state->closeRule(); state->add_rule();
parser->state = STATE_INIT; parser->state = STATE_INIT;
return 1; return 1;
} }
@ -104,6 +107,14 @@ int parser_parse(parser_t *parser, int token, const value_t *value, state_t *sta
break; break;
} }
switch (token) {
case TOK_NONTERM:
case TOK_TERM:
case TOK_CHAR:
case TOK_BLOCK:
free(value->str);;
}
return -1; return -1;
} }

View file

@ -38,7 +38,6 @@ enum token_t {
TOK_NONTERM, TOK_NONTERM,
TOK_BLOCK, TOK_BLOCK,
TOK_CHAR, TOK_CHAR,
TOK_STRING,
TOK_UINT, TOK_UINT,
}; };
@ -53,7 +52,7 @@ typedef struct parser parser_t;
parser_t * parser_alloc(void); parser_t * parser_alloc(void);
int parser_parse(parser_t *parser, int token, const value_t *value, state_t *state); int parser_push(parser_t *parser, int token, const value_t *value, state_t *state);
void parser_free(parser_t *parser); void parser_free(parser_t *parser);
} }

View file

@ -24,22 +24,23 @@
*/ */
#include <cstdio>
#include "lex.hpp" #include "lex.hpp"
#include "parser.hpp" #include "parser.hpp"
#include <cstdio>
#include <memory>
namespace solar { namespace solar {
bool readGrammar(const char *filename, state_t *state) { bool read_grammar(const char *filename, state_t *state) {
FILE *file = fopen(filename, "r"); FILE *file = fopen(filename, "r");
if (!file) { if (!file) {
std::fprintf(stderr, "unable to open file %s\n", filename); std::fprintf(stderr, "unable to open file %s\n", filename);
return false; return false;
} }
lex_t lexer(file); std::unique_ptr<lex_t> lexer(new lex_t(file));
parser_t *parser = parser_alloc(); parser_t *parser = parser_alloc();
int ret; int ret;
@ -47,22 +48,24 @@ bool readGrammar(const char *filename, state_t *state) {
int token; int token;
value_t value; value_t value;
token = lexer.lex(&value); token = lexer->lex(&value);
if (token < 0) { if (token < 0) {
std::fprintf(stderr, "error: %s at %s:%i:%i\n", value.error, filename, std::fprintf(stderr, "error: %s at %s:%i:%i\n", value.error, filename,
lexer.get_location().first_line, lexer.get_location().first_column); lexer->get_location().first_line, lexer->get_location().first_column);
return false; return false;
} }
ret = parser_parse(parser, token, &value, state); ret = parser_push(parser, token, &value, state);
} while (ret > 0); } while (ret > 0);
if (ret < 0) { if (ret < 0) {
std::fprintf(stderr, "error: parse error at %s:%i:%i\n", filename, std::fprintf(stderr, "error: parse error at %s:%i:%i\n", filename,
lexer.get_location().first_line, lexer.get_location().first_column); lexer->get_location().first_line, lexer->get_location().first_column);
return false; return false;
} }
parser_free(parser);
return true; return true;
} }
@ -73,7 +76,7 @@ int main() {
using namespace solar; using namespace solar;
state_t state; state_t state;
if (!readGrammar("grammar.y", &state)) if (!read_grammar("grammar.y", &state))
return 1; return 1;
return 0; return 0;

67
src/state.cpp Normal file
View file

@ -0,0 +1,67 @@
/*
Copyright (c) 2015, Matthias Schiffer <mschiffer@universe-factory.net>
All rights reserved.
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions are met:
1. Redistributions of source code must retain the above copyright notice,
this list of conditions and the following disclaimer.
2. Redistributions in binary form must reproduce the above copyright notice,
this list of conditions and the following disclaimer in the documentation
and/or other materials provided with the distribution.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
#include "state.hpp"
namespace solar {
void state_t::new_rule(const char *nonterm) {
if (rules.empty()) {
// start rule
current.get_rhs().emplace_back(symbol_t::make_nonterm(nonterm));
add_rule();
}
current = item_t(nonterm);
}
void state_t::add_rule_nonterminal(const char *nonterm) {
current.get_rhs().emplace_back(symbol_t::make_nonterm(nonterm));
}
void state_t::add_rule_terminal(const char *term) {
current.get_rhs().emplace_back(symbol_t::make_term(term));
}
void state_t::add_rule_terminal(unsigned char term) {
current.get_rhs().emplace_back(symbol_t::make_char(term));
}
void state_t::add_rule() {
rules.emplace(current.get_lhs(), current);
while (true) {
items.emplace(current.get_next_symbol(), current);
if (!current.can_shift())
break;
current.shift();
}
}
}

View file

@ -24,31 +24,35 @@
*/ */
#include "state.hpp" #pragma once
#include "item.hpp"
#include <map>
#include <utility>
namespace solar { namespace solar {
void state_t::openRule(const std::string &nonterm) { class state_t {
current_nonterm = nonterm; private:
current_rule = rhs_t(); std::multimap<std::string, item_t> rules;
std::multimap<symbol_t, item_t> items;
item_t current;
public:
state_t() : current("") {}
const std::multimap<std::string, item_t> & get_rules() const {
return rules;
} }
void state_t::addRuleNonterminal(const std::string &term) { void new_rule(const char *nonterm);
current_rule.emplace_back(nonterm_sym_t(term)); void add_rule_nonterminal(const char *nonterm);
} void add_rule_terminal(const char *term);
void add_rule_terminal(unsigned char term);
void state_t::addRuleTerminal(const std::string &term) { void add_rule();
terminals.emplace(term); };
current_rule.emplace_back(term_sym_t(term));
}
void state_t::addRuleTerminal(unsigned char term) {
current_rule.emplace_back(char_sym_t(term));
}
void state_t::closeRule() {
rules.emplace(std::move(current_nonterm), std::move(current_rule));
}
} }

71
src/symbol.hpp Normal file
View file

@ -0,0 +1,71 @@
/*
Copyright (c) 2015, Matthias Schiffer <mschiffer@universe-factory.net>
All rights reserved.
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions are met:
1. Redistributions of source code must retain the above copyright notice,
this list of conditions and the following disclaimer.
2. Redistributions in binary form must reproduce the above copyright notice,
this list of conditions and the following disclaimer in the documentation
and/or other materials provided with the distribution.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
#pragma once
#include <string>
#include <tuple>
namespace solar {
enum symbol_type_t {
SYMBOL_TYPE_END,
SYMBOL_TYPE_NONTERM,
SYMBOL_TYPE_TERM,
SYMBOL_TYPE_CHAR,
};
struct symbol_t : public std::tuple<symbol_type_t, std::string> {
symbol_t(symbol_type_t type, const char *value) : std::tuple<symbol_type_t, std::string>(type, value) {}
symbol_type_t get_type() const {
return std::get<0>(*this);
}
const std::string & get_value() const {
return std::get<1>(*this);
}
static symbol_t make_end() {
return symbol_t(SYMBOL_TYPE_END, "");
}
static symbol_t make_nonterm(const char *value) {
return symbol_t(SYMBOL_TYPE_NONTERM, value);
}
static symbol_t make_term(const char *value) {
return symbol_t(SYMBOL_TYPE_TERM, value);
}
static symbol_t make_char(unsigned char value) {
char v[2] = {char(value), 0};
return symbol_t(SYMBOL_TYPE_CHAR, v);
}
};
}