Generate items from grammar
This commit is contained in:
parent
09972613c4
commit
8de90acc67
11 changed files with 251 additions and 92 deletions
|
@ -1,11 +1,4 @@
|
|||
cmake_minimum_required(VERSION 2.8.3)
|
||||
project(SOLAR CXX)
|
||||
|
||||
|
||||
add_executable(solar
|
||||
lex.cpp
|
||||
parser.cpp
|
||||
solar.cpp
|
||||
state.cpp
|
||||
)
|
||||
set_target_properties(solar PROPERTIES COMPILE_FLAGS "-std=c++11 -Wall")
|
||||
add_subdirectory(src)
|
||||
|
|
7
src/CMakeLists.txt
Normal file
7
src/CMakeLists.txt
Normal file
|
@ -0,0 +1,7 @@
|
|||
add_executable(solar
|
||||
lex.cpp
|
||||
parser.cpp
|
||||
solar.cpp
|
||||
state.cpp
|
||||
)
|
||||
set_target_properties(solar PROPERTIES COMPILE_FLAGS "-std=c++11 -Wall")
|
|
@ -26,54 +26,55 @@
|
|||
|
||||
#pragma once
|
||||
|
||||
#include <string>
|
||||
#include <unordered_set>
|
||||
#include <unordered_map>
|
||||
#include <utility>
|
||||
#include "symbol.hpp"
|
||||
|
||||
#include <vector>
|
||||
|
||||
|
||||
namespace solar {
|
||||
|
||||
class state_t {
|
||||
private:
|
||||
struct sym_t {
|
||||
virtual ~sym_t() {}
|
||||
};
|
||||
struct item_t : public std::tuple<std::string, std::vector<symbol_t>, unsigned> {
|
||||
item_t(const std::string &lhs)
|
||||
: std::tuple<std::string, std::vector<symbol_t>, unsigned>(lhs, std::vector<symbol_t>(), 0) {}
|
||||
|
||||
struct nonterm_sym_t : public sym_t {
|
||||
std::string value;
|
||||
const std::string & get_lhs() const {
|
||||
return std::get<0>(*this);
|
||||
}
|
||||
|
||||
nonterm_sym_t(const std::string &value0) : value(value0) {}
|
||||
};
|
||||
std::string & get_lhs() {
|
||||
return std::get<0>(*this);
|
||||
}
|
||||
|
||||
struct term_sym_t : public sym_t {
|
||||
std::string value;
|
||||
const std::vector<symbol_t> & get_rhs() const {
|
||||
return std::get<1>(*this);
|
||||
}
|
||||
|
||||
term_sym_t(const std::string &value0) : value(value0) {}
|
||||
};
|
||||
std::vector<symbol_t> & get_rhs() {
|
||||
return std::get<1>(*this);
|
||||
}
|
||||
|
||||
struct char_sym_t : public sym_t {
|
||||
unsigned char value;
|
||||
unsigned get_point() const {
|
||||
return std::get<2>(*this);
|
||||
}
|
||||
|
||||
char_sym_t(unsigned char value0) : value(value0) {}
|
||||
};
|
||||
unsigned & get_point() {
|
||||
return std::get<2>(*this);
|
||||
}
|
||||
|
||||
bool can_shift() const {
|
||||
return get_point() < get_rhs().size();
|
||||
}
|
||||
|
||||
typedef std::vector<sym_t> rhs_t;
|
||||
void shift() {
|
||||
get_point()++;
|
||||
}
|
||||
|
||||
std::unordered_set<std::string> terminals;
|
||||
std::unordered_multimap<std::string, rhs_t> rules;
|
||||
|
||||
std::string current_nonterm;
|
||||
rhs_t current_rule;
|
||||
|
||||
public:
|
||||
void openRule(const std::string &nonterm);
|
||||
void addRuleTerminal(const std::string &term);
|
||||
void addRuleTerminal(unsigned char term);
|
||||
void addRuleNonterminal(const std::string &nonterm);
|
||||
void closeRule();
|
||||
symbol_t get_next_symbol() const {
|
||||
if (can_shift())
|
||||
return get_rhs()[get_point()];
|
||||
else
|
||||
return symbol_t::make_end();
|
||||
}
|
||||
};
|
||||
|
||||
}
|
|
@ -130,6 +130,7 @@ int lex_t::consume_comment(value_t *value) {
|
|||
return -1;
|
||||
}
|
||||
|
||||
/*
|
||||
int lex_t::unterminated_string(value_t *value) {
|
||||
if (ferror(file))
|
||||
return io_error(value);
|
||||
|
@ -186,7 +187,7 @@ int lex_t::lex_string(value_t *value) {
|
|||
consume(true);
|
||||
|
||||
return TOK_STRING;
|
||||
}
|
||||
}*/
|
||||
|
||||
int lex_t::lex_number(value_t *value) {
|
||||
if (needspace)
|
||||
|
@ -230,7 +231,7 @@ int lex_t::lex_keyword(value_t *value) {
|
|||
}
|
||||
|
||||
char *token = get_token();
|
||||
const keyword_t key = { .keyword = token };
|
||||
const keyword_t key = { .keyword = token, .token = 0 };
|
||||
const keyword_t *ret = static_cast<const keyword_t*>(bsearch(&key, keywords, array_size(keywords), sizeof(keyword_t), compare_keywords));
|
||||
free(token);
|
||||
|
||||
|
@ -274,10 +275,6 @@ int lex_t::lex_symbol(value_t *value, bool terminal) {
|
|||
return terminal ? TOK_TERM : TOK_NONTERM;
|
||||
}
|
||||
|
||||
lex_t::lex_t(FILE *file0) : file(file0), needspace(false), start(0), end(0), tok_len(0) {
|
||||
advance();
|
||||
}
|
||||
|
||||
int lex_t::lex(value_t *value) {
|
||||
int token;
|
||||
|
||||
|
@ -320,8 +317,8 @@ int lex_t::lex(value_t *value) {
|
|||
if (current() != '/')
|
||||
return syntax_error(value);
|
||||
|
||||
/* fall-through */
|
||||
case '#':
|
||||
///* fall-through */
|
||||
//case '#':
|
||||
while (next(true)) {
|
||||
if (current() == '\n')
|
||||
break;
|
||||
|
@ -346,8 +343,8 @@ int lex_t::lex(value_t *value) {
|
|||
|
||||
return TOK_CHAR;
|
||||
|
||||
case '"':
|
||||
return lex_string(value);
|
||||
//case '"':
|
||||
//return lex_string(value);
|
||||
|
||||
case '0' ... '9':
|
||||
return lex_number(value);
|
|
@ -55,7 +55,7 @@ private:
|
|||
size_t start;
|
||||
size_t end;
|
||||
size_t tok_len;
|
||||
char buffer[1024];
|
||||
char buffer[65556];
|
||||
|
||||
|
||||
bool advance();
|
||||
|
@ -65,7 +65,7 @@ private:
|
|||
int io_error(value_t *value);
|
||||
int syntax_error(value_t *value);
|
||||
int consume_comment(value_t *value);
|
||||
int unterminated_string(value_t *value);
|
||||
//int unterminated_string(value_t *value);
|
||||
|
||||
int lex_string(value_t *value);
|
||||
int lex_address(value_t *value);
|
||||
|
@ -84,7 +84,13 @@ private:
|
|||
|
||||
|
||||
public:
|
||||
lex_t(FILE *file);
|
||||
lex_t(FILE *file0) : file(file0), needspace(false), start(0), end(0), tok_len(0) {
|
||||
advance();
|
||||
}
|
||||
|
||||
~lex_t() {
|
||||
fclose(file);
|
||||
}
|
||||
|
||||
int lex(value_t *value);
|
||||
|
|
@ -26,7 +26,6 @@
|
|||
|
||||
#include "parser.hpp"
|
||||
|
||||
#include <cstdio>
|
||||
#include <cstdlib>
|
||||
|
||||
|
||||
|
@ -50,13 +49,14 @@ parser_t * parser_alloc(void) {
|
|||
return parser;
|
||||
}
|
||||
|
||||
int parser_parse(parser_t *parser, int token, const value_t *value, state_t *state) {
|
||||
int parser_push(parser_t *parser, int token, const value_t *value, state_t *state) {
|
||||
switch (parser->state) {
|
||||
case STATE_INIT:
|
||||
switch (token) {
|
||||
case TOK_NONTERM:
|
||||
parser->state = STATE_RULE_BAR;
|
||||
state->openRule(value->str);
|
||||
state->new_rule(value->str);
|
||||
free(value->str);
|
||||
return 1;
|
||||
|
||||
case 0:
|
||||
|
@ -66,9 +66,10 @@ int parser_parse(parser_t *parser, int token, const value_t *value, state_t *sta
|
|||
break;
|
||||
|
||||
case STATE_RULE_BAR:
|
||||
if (token == '|') {
|
||||
parser->state = STATE_RULE_EQUAL;
|
||||
if (token == '|')
|
||||
return 1;
|
||||
}
|
||||
|
||||
break;
|
||||
|
||||
|
@ -83,20 +84,22 @@ int parser_parse(parser_t *parser, int token, const value_t *value, state_t *sta
|
|||
case STATE_RULE:
|
||||
switch (token) {
|
||||
case TOK_NONTERM:
|
||||
state->addRuleNonterminal(value->str);
|
||||
state->add_rule_nonterminal(value->str);
|
||||
free(value->str);
|
||||
return 1;
|
||||
|
||||
case TOK_TERM:
|
||||
state->addRuleTerminal(value->str);
|
||||
state->add_rule_terminal(value->str);
|
||||
free(value->str);
|
||||
return 1;
|
||||
|
||||
case TOK_CHAR:
|
||||
state->addRuleTerminal(value->number);
|
||||
state->add_rule_terminal(value->number);
|
||||
return 1;
|
||||
|
||||
case TOK_BLOCK:
|
||||
case ';':
|
||||
state->closeRule();
|
||||
state->add_rule();
|
||||
parser->state = STATE_INIT;
|
||||
return 1;
|
||||
}
|
||||
|
@ -104,6 +107,14 @@ int parser_parse(parser_t *parser, int token, const value_t *value, state_t *sta
|
|||
break;
|
||||
}
|
||||
|
||||
switch (token) {
|
||||
case TOK_NONTERM:
|
||||
case TOK_TERM:
|
||||
case TOK_CHAR:
|
||||
case TOK_BLOCK:
|
||||
free(value->str);;
|
||||
}
|
||||
|
||||
return -1;
|
||||
}
|
||||
|
|
@ -38,7 +38,6 @@ enum token_t {
|
|||
TOK_NONTERM,
|
||||
TOK_BLOCK,
|
||||
TOK_CHAR,
|
||||
TOK_STRING,
|
||||
TOK_UINT,
|
||||
};
|
||||
|
||||
|
@ -53,7 +52,7 @@ typedef struct parser parser_t;
|
|||
|
||||
|
||||
parser_t * parser_alloc(void);
|
||||
int parser_parse(parser_t *parser, int token, const value_t *value, state_t *state);
|
||||
int parser_push(parser_t *parser, int token, const value_t *value, state_t *state);
|
||||
void parser_free(parser_t *parser);
|
||||
|
||||
}
|
|
@ -24,22 +24,23 @@
|
|||
*/
|
||||
|
||||
|
||||
#include <cstdio>
|
||||
|
||||
#include "lex.hpp"
|
||||
#include "parser.hpp"
|
||||
|
||||
#include <cstdio>
|
||||
#include <memory>
|
||||
|
||||
|
||||
namespace solar {
|
||||
|
||||
bool readGrammar(const char *filename, state_t *state) {
|
||||
bool read_grammar(const char *filename, state_t *state) {
|
||||
FILE *file = fopen(filename, "r");
|
||||
if (!file) {
|
||||
std::fprintf(stderr, "unable to open file %s\n", filename);
|
||||
return false;
|
||||
}
|
||||
|
||||
lex_t lexer(file);
|
||||
std::unique_ptr<lex_t> lexer(new lex_t(file));
|
||||
parser_t *parser = parser_alloc();
|
||||
int ret;
|
||||
|
||||
|
@ -47,22 +48,24 @@ bool readGrammar(const char *filename, state_t *state) {
|
|||
int token;
|
||||
value_t value;
|
||||
|
||||
token = lexer.lex(&value);
|
||||
token = lexer->lex(&value);
|
||||
if (token < 0) {
|
||||
std::fprintf(stderr, "error: %s at %s:%i:%i\n", value.error, filename,
|
||||
lexer.get_location().first_line, lexer.get_location().first_column);
|
||||
lexer->get_location().first_line, lexer->get_location().first_column);
|
||||
return false;
|
||||
}
|
||||
|
||||
ret = parser_parse(parser, token, &value, state);
|
||||
ret = parser_push(parser, token, &value, state);
|
||||
} while (ret > 0);
|
||||
|
||||
if (ret < 0) {
|
||||
std::fprintf(stderr, "error: parse error at %s:%i:%i\n", filename,
|
||||
lexer.get_location().first_line, lexer.get_location().first_column);
|
||||
lexer->get_location().first_line, lexer->get_location().first_column);
|
||||
return false;
|
||||
}
|
||||
|
||||
parser_free(parser);
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
|
@ -73,7 +76,7 @@ int main() {
|
|||
using namespace solar;
|
||||
|
||||
state_t state;
|
||||
if (!readGrammar("grammar.y", &state))
|
||||
if (!read_grammar("grammar.y", &state))
|
||||
return 1;
|
||||
|
||||
return 0;
|
67
src/state.cpp
Normal file
67
src/state.cpp
Normal file
|
@ -0,0 +1,67 @@
|
|||
/*
|
||||
Copyright (c) 2015, Matthias Schiffer <mschiffer@universe-factory.net>
|
||||
All rights reserved.
|
||||
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions are met:
|
||||
|
||||
1. Redistributions of source code must retain the above copyright notice,
|
||||
this list of conditions and the following disclaimer.
|
||||
2. Redistributions in binary form must reproduce the above copyright notice,
|
||||
this list of conditions and the following disclaimer in the documentation
|
||||
and/or other materials provided with the distribution.
|
||||
|
||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||||
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
|
||||
DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
|
||||
FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
|
||||
DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
|
||||
SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
|
||||
CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
|
||||
OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
*/
|
||||
|
||||
|
||||
#include "state.hpp"
|
||||
|
||||
|
||||
namespace solar {
|
||||
|
||||
void state_t::new_rule(const char *nonterm) {
|
||||
if (rules.empty()) {
|
||||
// start rule
|
||||
current.get_rhs().emplace_back(symbol_t::make_nonterm(nonterm));
|
||||
add_rule();
|
||||
}
|
||||
|
||||
current = item_t(nonterm);
|
||||
|
||||
}
|
||||
|
||||
void state_t::add_rule_nonterminal(const char *nonterm) {
|
||||
current.get_rhs().emplace_back(symbol_t::make_nonterm(nonterm));
|
||||
}
|
||||
|
||||
void state_t::add_rule_terminal(const char *term) {
|
||||
current.get_rhs().emplace_back(symbol_t::make_term(term));
|
||||
}
|
||||
|
||||
void state_t::add_rule_terminal(unsigned char term) {
|
||||
current.get_rhs().emplace_back(symbol_t::make_char(term));
|
||||
}
|
||||
|
||||
void state_t::add_rule() {
|
||||
rules.emplace(current.get_lhs(), current);
|
||||
|
||||
while (true) {
|
||||
items.emplace(current.get_next_symbol(), current);
|
||||
if (!current.can_shift())
|
||||
break;
|
||||
|
||||
current.shift();
|
||||
}
|
||||
}
|
||||
|
||||
}
|
|
@ -24,31 +24,35 @@
|
|||
*/
|
||||
|
||||
|
||||
#include "state.hpp"
|
||||
#pragma once
|
||||
|
||||
#include "item.hpp"
|
||||
|
||||
#include <map>
|
||||
#include <utility>
|
||||
|
||||
|
||||
namespace solar {
|
||||
|
||||
void state_t::openRule(const std::string &nonterm) {
|
||||
current_nonterm = nonterm;
|
||||
current_rule = rhs_t();
|
||||
class state_t {
|
||||
private:
|
||||
std::multimap<std::string, item_t> rules;
|
||||
std::multimap<symbol_t, item_t> items;
|
||||
|
||||
item_t current;
|
||||
|
||||
public:
|
||||
state_t() : current("") {}
|
||||
|
||||
const std::multimap<std::string, item_t> & get_rules() const {
|
||||
return rules;
|
||||
}
|
||||
|
||||
void state_t::addRuleNonterminal(const std::string &term) {
|
||||
current_rule.emplace_back(nonterm_sym_t(term));
|
||||
}
|
||||
|
||||
void state_t::addRuleTerminal(const std::string &term) {
|
||||
terminals.emplace(term);
|
||||
current_rule.emplace_back(term_sym_t(term));
|
||||
}
|
||||
|
||||
void state_t::addRuleTerminal(unsigned char term) {
|
||||
current_rule.emplace_back(char_sym_t(term));
|
||||
}
|
||||
|
||||
void state_t::closeRule() {
|
||||
rules.emplace(std::move(current_nonterm), std::move(current_rule));
|
||||
}
|
||||
void new_rule(const char *nonterm);
|
||||
void add_rule_nonterminal(const char *nonterm);
|
||||
void add_rule_terminal(const char *term);
|
||||
void add_rule_terminal(unsigned char term);
|
||||
void add_rule();
|
||||
};
|
||||
|
||||
}
|
71
src/symbol.hpp
Normal file
71
src/symbol.hpp
Normal file
|
@ -0,0 +1,71 @@
|
|||
/*
|
||||
Copyright (c) 2015, Matthias Schiffer <mschiffer@universe-factory.net>
|
||||
All rights reserved.
|
||||
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions are met:
|
||||
|
||||
1. Redistributions of source code must retain the above copyright notice,
|
||||
this list of conditions and the following disclaimer.
|
||||
2. Redistributions in binary form must reproduce the above copyright notice,
|
||||
this list of conditions and the following disclaimer in the documentation
|
||||
and/or other materials provided with the distribution.
|
||||
|
||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||||
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
|
||||
DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
|
||||
FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
|
||||
DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
|
||||
SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
|
||||
CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
|
||||
OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
*/
|
||||
|
||||
|
||||
#pragma once
|
||||
|
||||
#include <string>
|
||||
#include <tuple>
|
||||
|
||||
|
||||
namespace solar {
|
||||
|
||||
enum symbol_type_t {
|
||||
SYMBOL_TYPE_END,
|
||||
SYMBOL_TYPE_NONTERM,
|
||||
SYMBOL_TYPE_TERM,
|
||||
SYMBOL_TYPE_CHAR,
|
||||
};
|
||||
|
||||
struct symbol_t : public std::tuple<symbol_type_t, std::string> {
|
||||
symbol_t(symbol_type_t type, const char *value) : std::tuple<symbol_type_t, std::string>(type, value) {}
|
||||
|
||||
symbol_type_t get_type() const {
|
||||
return std::get<0>(*this);
|
||||
}
|
||||
|
||||
const std::string & get_value() const {
|
||||
return std::get<1>(*this);
|
||||
}
|
||||
|
||||
static symbol_t make_end() {
|
||||
return symbol_t(SYMBOL_TYPE_END, "");
|
||||
}
|
||||
|
||||
static symbol_t make_nonterm(const char *value) {
|
||||
return symbol_t(SYMBOL_TYPE_NONTERM, value);
|
||||
}
|
||||
|
||||
static symbol_t make_term(const char *value) {
|
||||
return symbol_t(SYMBOL_TYPE_TERM, value);
|
||||
}
|
||||
|
||||
static symbol_t make_char(unsigned char value) {
|
||||
char v[2] = {char(value), 0};
|
||||
return symbol_t(SYMBOL_TYPE_CHAR, v);
|
||||
}
|
||||
};
|
||||
|
||||
}
|
Reference in a new issue