Generate items from grammar

This commit is contained in:
Matthias Schiffer 2015-03-27 03:38:01 +01:00
parent 09972613c4
commit 8de90acc67
11 changed files with 251 additions and 92 deletions

View file

@ -1,11 +1,4 @@
cmake_minimum_required(VERSION 2.8.3)
project(SOLAR CXX)
add_executable(solar
lex.cpp
parser.cpp
solar.cpp
state.cpp
)
set_target_properties(solar PROPERTIES COMPILE_FLAGS "-std=c++11 -Wall")
add_subdirectory(src)

7
src/CMakeLists.txt Normal file
View file

@ -0,0 +1,7 @@
add_executable(solar
lex.cpp
parser.cpp
solar.cpp
state.cpp
)
set_target_properties(solar PROPERTIES COMPILE_FLAGS "-std=c++11 -Wall")

View file

@ -26,54 +26,55 @@
#pragma once
#include <string>
#include <unordered_set>
#include <unordered_map>
#include <utility>
#include "symbol.hpp"
#include <vector>
namespace solar {
class state_t {
private:
struct sym_t {
virtual ~sym_t() {}
};
struct item_t : public std::tuple<std::string, std::vector<symbol_t>, unsigned> {
item_t(const std::string &lhs)
: std::tuple<std::string, std::vector<symbol_t>, unsigned>(lhs, std::vector<symbol_t>(), 0) {}
struct nonterm_sym_t : public sym_t {
std::string value;
const std::string & get_lhs() const {
return std::get<0>(*this);
}
nonterm_sym_t(const std::string &value0) : value(value0) {}
};
std::string & get_lhs() {
return std::get<0>(*this);
}
struct term_sym_t : public sym_t {
std::string value;
const std::vector<symbol_t> & get_rhs() const {
return std::get<1>(*this);
}
term_sym_t(const std::string &value0) : value(value0) {}
};
std::vector<symbol_t> & get_rhs() {
return std::get<1>(*this);
}
struct char_sym_t : public sym_t {
unsigned char value;
unsigned get_point() const {
return std::get<2>(*this);
}
char_sym_t(unsigned char value0) : value(value0) {}
};
unsigned & get_point() {
return std::get<2>(*this);
}
bool can_shift() const {
return get_point() < get_rhs().size();
}
typedef std::vector<sym_t> rhs_t;
void shift() {
get_point()++;
}
std::unordered_set<std::string> terminals;
std::unordered_multimap<std::string, rhs_t> rules;
std::string current_nonterm;
rhs_t current_rule;
public:
void openRule(const std::string &nonterm);
void addRuleTerminal(const std::string &term);
void addRuleTerminal(unsigned char term);
void addRuleNonterminal(const std::string &nonterm);
void closeRule();
symbol_t get_next_symbol() const {
if (can_shift())
return get_rhs()[get_point()];
else
return symbol_t::make_end();
}
};
}

View file

@ -130,6 +130,7 @@ int lex_t::consume_comment(value_t *value) {
return -1;
}
/*
int lex_t::unterminated_string(value_t *value) {
if (ferror(file))
return io_error(value);
@ -186,7 +187,7 @@ int lex_t::lex_string(value_t *value) {
consume(true);
return TOK_STRING;
}
}*/
int lex_t::lex_number(value_t *value) {
if (needspace)
@ -230,7 +231,7 @@ int lex_t::lex_keyword(value_t *value) {
}
char *token = get_token();
const keyword_t key = { .keyword = token };
const keyword_t key = { .keyword = token, .token = 0 };
const keyword_t *ret = static_cast<const keyword_t*>(bsearch(&key, keywords, array_size(keywords), sizeof(keyword_t), compare_keywords));
free(token);
@ -274,10 +275,6 @@ int lex_t::lex_symbol(value_t *value, bool terminal) {
return terminal ? TOK_TERM : TOK_NONTERM;
}
lex_t::lex_t(FILE *file0) : file(file0), needspace(false), start(0), end(0), tok_len(0) {
advance();
}
int lex_t::lex(value_t *value) {
int token;
@ -320,8 +317,8 @@ int lex_t::lex(value_t *value) {
if (current() != '/')
return syntax_error(value);
/* fall-through */
case '#':
///* fall-through */
//case '#':
while (next(true)) {
if (current() == '\n')
break;
@ -346,8 +343,8 @@ int lex_t::lex(value_t *value) {
return TOK_CHAR;
case '"':
return lex_string(value);
//case '"':
//return lex_string(value);
case '0' ... '9':
return lex_number(value);

View file

@ -55,7 +55,7 @@ private:
size_t start;
size_t end;
size_t tok_len;
char buffer[1024];
char buffer[65556];
bool advance();
@ -65,7 +65,7 @@ private:
int io_error(value_t *value);
int syntax_error(value_t *value);
int consume_comment(value_t *value);
int unterminated_string(value_t *value);
//int unterminated_string(value_t *value);
int lex_string(value_t *value);
int lex_address(value_t *value);
@ -84,7 +84,13 @@ private:
public:
lex_t(FILE *file);
lex_t(FILE *file0) : file(file0), needspace(false), start(0), end(0), tok_len(0) {
advance();
}
~lex_t() {
fclose(file);
}
int lex(value_t *value);

View file

@ -26,7 +26,6 @@
#include "parser.hpp"
#include <cstdio>
#include <cstdlib>
@ -50,13 +49,14 @@ parser_t * parser_alloc(void) {
return parser;
}
int parser_parse(parser_t *parser, int token, const value_t *value, state_t *state) {
int parser_push(parser_t *parser, int token, const value_t *value, state_t *state) {
switch (parser->state) {
case STATE_INIT:
switch (token) {
case TOK_NONTERM:
parser->state = STATE_RULE_BAR;
state->openRule(value->str);
state->new_rule(value->str);
free(value->str);
return 1;
case 0:
@ -66,9 +66,10 @@ int parser_parse(parser_t *parser, int token, const value_t *value, state_t *sta
break;
case STATE_RULE_BAR:
if (token == '|') {
parser->state = STATE_RULE_EQUAL;
if (token == '|')
return 1;
}
break;
@ -83,20 +84,22 @@ int parser_parse(parser_t *parser, int token, const value_t *value, state_t *sta
case STATE_RULE:
switch (token) {
case TOK_NONTERM:
state->addRuleNonterminal(value->str);
state->add_rule_nonterminal(value->str);
free(value->str);
return 1;
case TOK_TERM:
state->addRuleTerminal(value->str);
state->add_rule_terminal(value->str);
free(value->str);
return 1;
case TOK_CHAR:
state->addRuleTerminal(value->number);
state->add_rule_terminal(value->number);
return 1;
case TOK_BLOCK:
case ';':
state->closeRule();
state->add_rule();
parser->state = STATE_INIT;
return 1;
}
@ -104,6 +107,14 @@ int parser_parse(parser_t *parser, int token, const value_t *value, state_t *sta
break;
}
switch (token) {
case TOK_NONTERM:
case TOK_TERM:
case TOK_CHAR:
case TOK_BLOCK:
free(value->str);;
}
return -1;
}

View file

@ -38,7 +38,6 @@ enum token_t {
TOK_NONTERM,
TOK_BLOCK,
TOK_CHAR,
TOK_STRING,
TOK_UINT,
};
@ -53,7 +52,7 @@ typedef struct parser parser_t;
parser_t * parser_alloc(void);
int parser_parse(parser_t *parser, int token, const value_t *value, state_t *state);
int parser_push(parser_t *parser, int token, const value_t *value, state_t *state);
void parser_free(parser_t *parser);
}

View file

@ -24,22 +24,23 @@
*/
#include <cstdio>
#include "lex.hpp"
#include "parser.hpp"
#include <cstdio>
#include <memory>
namespace solar {
bool readGrammar(const char *filename, state_t *state) {
bool read_grammar(const char *filename, state_t *state) {
FILE *file = fopen(filename, "r");
if (!file) {
std::fprintf(stderr, "unable to open file %s\n", filename);
return false;
}
lex_t lexer(file);
std::unique_ptr<lex_t> lexer(new lex_t(file));
parser_t *parser = parser_alloc();
int ret;
@ -47,22 +48,24 @@ bool readGrammar(const char *filename, state_t *state) {
int token;
value_t value;
token = lexer.lex(&value);
token = lexer->lex(&value);
if (token < 0) {
std::fprintf(stderr, "error: %s at %s:%i:%i\n", value.error, filename,
lexer.get_location().first_line, lexer.get_location().first_column);
lexer->get_location().first_line, lexer->get_location().first_column);
return false;
}
ret = parser_parse(parser, token, &value, state);
ret = parser_push(parser, token, &value, state);
} while (ret > 0);
if (ret < 0) {
std::fprintf(stderr, "error: parse error at %s:%i:%i\n", filename,
lexer.get_location().first_line, lexer.get_location().first_column);
lexer->get_location().first_line, lexer->get_location().first_column);
return false;
}
parser_free(parser);
return true;
}
@ -73,7 +76,7 @@ int main() {
using namespace solar;
state_t state;
if (!readGrammar("grammar.y", &state))
if (!read_grammar("grammar.y", &state))
return 1;
return 0;

67
src/state.cpp Normal file
View file

@ -0,0 +1,67 @@
/*
Copyright (c) 2015, Matthias Schiffer <mschiffer@universe-factory.net>
All rights reserved.
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions are met:
1. Redistributions of source code must retain the above copyright notice,
this list of conditions and the following disclaimer.
2. Redistributions in binary form must reproduce the above copyright notice,
this list of conditions and the following disclaimer in the documentation
and/or other materials provided with the distribution.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
#include "state.hpp"
namespace solar {
void state_t::new_rule(const char *nonterm) {
if (rules.empty()) {
// start rule
current.get_rhs().emplace_back(symbol_t::make_nonterm(nonterm));
add_rule();
}
current = item_t(nonterm);
}
void state_t::add_rule_nonterminal(const char *nonterm) {
current.get_rhs().emplace_back(symbol_t::make_nonterm(nonterm));
}
void state_t::add_rule_terminal(const char *term) {
current.get_rhs().emplace_back(symbol_t::make_term(term));
}
void state_t::add_rule_terminal(unsigned char term) {
current.get_rhs().emplace_back(symbol_t::make_char(term));
}
void state_t::add_rule() {
rules.emplace(current.get_lhs(), current);
while (true) {
items.emplace(current.get_next_symbol(), current);
if (!current.can_shift())
break;
current.shift();
}
}
}

View file

@ -24,31 +24,35 @@
*/
#include "state.hpp"
#pragma once
#include "item.hpp"
#include <map>
#include <utility>
namespace solar {
void state_t::openRule(const std::string &nonterm) {
current_nonterm = nonterm;
current_rule = rhs_t();
}
class state_t {
private:
std::multimap<std::string, item_t> rules;
std::multimap<symbol_t, item_t> items;
void state_t::addRuleNonterminal(const std::string &term) {
current_rule.emplace_back(nonterm_sym_t(term));
}
item_t current;
void state_t::addRuleTerminal(const std::string &term) {
terminals.emplace(term);
current_rule.emplace_back(term_sym_t(term));
}
public:
state_t() : current("") {}
void state_t::addRuleTerminal(unsigned char term) {
current_rule.emplace_back(char_sym_t(term));
}
const std::multimap<std::string, item_t> & get_rules() const {
return rules;
}
void state_t::closeRule() {
rules.emplace(std::move(current_nonterm), std::move(current_rule));
}
void new_rule(const char *nonterm);
void add_rule_nonterminal(const char *nonterm);
void add_rule_terminal(const char *term);
void add_rule_terminal(unsigned char term);
void add_rule();
};
}

71
src/symbol.hpp Normal file
View file

@ -0,0 +1,71 @@
/*
Copyright (c) 2015, Matthias Schiffer <mschiffer@universe-factory.net>
All rights reserved.
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions are met:
1. Redistributions of source code must retain the above copyright notice,
this list of conditions and the following disclaimer.
2. Redistributions in binary form must reproduce the above copyright notice,
this list of conditions and the following disclaimer in the documentation
and/or other materials provided with the distribution.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
#pragma once
#include <string>
#include <tuple>
namespace solar {
enum symbol_type_t {
SYMBOL_TYPE_END,
SYMBOL_TYPE_NONTERM,
SYMBOL_TYPE_TERM,
SYMBOL_TYPE_CHAR,
};
struct symbol_t : public std::tuple<symbol_type_t, std::string> {
symbol_t(symbol_type_t type, const char *value) : std::tuple<symbol_type_t, std::string>(type, value) {}
symbol_type_t get_type() const {
return std::get<0>(*this);
}
const std::string & get_value() const {
return std::get<1>(*this);
}
static symbol_t make_end() {
return symbol_t(SYMBOL_TYPE_END, "");
}
static symbol_t make_nonterm(const char *value) {
return symbol_t(SYMBOL_TYPE_NONTERM, value);
}
static symbol_t make_term(const char *value) {
return symbol_t(SYMBOL_TYPE_TERM, value);
}
static symbol_t make_char(unsigned char value) {
char v[2] = {char(value), 0};
return symbol_t(SYMBOL_TYPE_CHAR, v);
}
};
}