Add support for simple reduce actions
This commit is contained in:
parent
18f88fc863
commit
96dd0ebd26
9 changed files with 137 additions and 26 deletions
|
@ -36,7 +36,7 @@ std::set<item_t> generator_t::get_set(const std::string &nonterm) {
|
||||||
|
|
||||||
auto entries = nonterms.equal_range(nonterm);
|
auto entries = nonterms.equal_range(nonterm);
|
||||||
for (auto entry = entries.first; entry != entries.second; ++entry)
|
for (auto entry = entries.first; entry != entries.second; ++entry)
|
||||||
set.insert(rules[entry->second]);
|
set.insert(rules[entry->second].first);
|
||||||
|
|
||||||
return set;
|
return set;
|
||||||
}
|
}
|
||||||
|
@ -117,9 +117,9 @@ void generator_t::generate_itemsets() {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
generator_t::generator_t(const std::vector<item_t> &rules0) : rules(rules0) {
|
generator_t::generator_t(const std::vector<std::pair<item_t, std::string>> &rules0) : rules(rules0) {
|
||||||
for (size_t i = 0; i < rules.size(); i++) {
|
for (size_t i = 0; i < rules.size(); i++) {
|
||||||
item_t rule = rules[i];
|
item_t rule = rules[i].first;
|
||||||
|
|
||||||
nonterms.emplace(rule.get_lhs(), i);
|
nonterms.emplace(rule.get_lhs(), i);
|
||||||
|
|
||||||
|
|
|
@ -36,7 +36,7 @@ namespace solar {
|
||||||
|
|
||||||
class generator_t {
|
class generator_t {
|
||||||
private:
|
private:
|
||||||
std::vector<item_t> rules;
|
std::vector<std::pair<item_t, std::string>> rules;
|
||||||
std::map<item_t, size_t> rule_ids;
|
std::map<item_t, size_t> rule_ids;
|
||||||
std::multimap<std::string, size_t> nonterms;
|
std::multimap<std::string, size_t> nonterms;
|
||||||
|
|
||||||
|
@ -67,7 +67,7 @@ public:
|
||||||
return itemsets.size();
|
return itemsets.size();
|
||||||
}
|
}
|
||||||
|
|
||||||
const std::vector<item_t> & get_rules() const {
|
const std::vector<std::pair<item_t, std::string>> & get_rules() const {
|
||||||
return rules;
|
return rules;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -83,7 +83,7 @@ public:
|
||||||
return gotos;
|
return gotos;
|
||||||
}
|
}
|
||||||
|
|
||||||
generator_t(const std::vector<item_t> &rules0);
|
generator_t(const std::vector<std::pair<item_t, std::string>> &rules0);
|
||||||
};
|
};
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
84
src/lex.cpp
84
src/lex.cpp
|
@ -243,6 +243,85 @@ int lex_t::lex_keyword(parser_value_t *value) {
|
||||||
return ret->token;
|
return ret->token;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
int lex_t::unterminated_block(parser_value_t *value) {
|
||||||
|
if (ferror(file))
|
||||||
|
return io_error(value);
|
||||||
|
|
||||||
|
value->error = "unterminated code block";
|
||||||
|
return -1;
|
||||||
|
}
|
||||||
|
|
||||||
|
int lex_t::lex_block(parser_value_t *value) {
|
||||||
|
size_t parens = 0;
|
||||||
|
bool line_comment = false;
|
||||||
|
bool block_comment = false;
|
||||||
|
bool str = false;
|
||||||
|
|
||||||
|
size_t pos = 0;
|
||||||
|
size_t len = 1024;
|
||||||
|
char *buf = static_cast<char*>(std::malloc(len));
|
||||||
|
|
||||||
|
char prev = 0;
|
||||||
|
|
||||||
|
while (true) {
|
||||||
|
if (!next(true)) {
|
||||||
|
std::free(buf);
|
||||||
|
return unterminated_block(value);
|
||||||
|
}
|
||||||
|
|
||||||
|
char cur = current();
|
||||||
|
|
||||||
|
if (line_comment) {
|
||||||
|
if (cur == '\n' || cur == '\r')
|
||||||
|
line_comment = false;
|
||||||
|
}
|
||||||
|
else if (block_comment) {
|
||||||
|
if (prev == '*' && cur == '/')
|
||||||
|
block_comment = false;
|
||||||
|
}
|
||||||
|
else if (str) {
|
||||||
|
if (prev != '\\' && cur == '"')
|
||||||
|
str = false;
|
||||||
|
}
|
||||||
|
else {
|
||||||
|
if (cur == '{') {
|
||||||
|
parens++;
|
||||||
|
}
|
||||||
|
else if (cur == '}') {
|
||||||
|
if (!parens)
|
||||||
|
break;
|
||||||
|
|
||||||
|
parens--;
|
||||||
|
}
|
||||||
|
else if (cur == '"') {
|
||||||
|
str = true;
|
||||||
|
}
|
||||||
|
else if (prev == '/' && cur == '/') {
|
||||||
|
line_comment = true;
|
||||||
|
}
|
||||||
|
else if (prev == '/' && cur == '*') {
|
||||||
|
block_comment = true;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if (pos >= len) {
|
||||||
|
len *= 2;
|
||||||
|
buf = static_cast<char*>(std::realloc(buf, len));
|
||||||
|
}
|
||||||
|
|
||||||
|
buf[pos++] = cur;
|
||||||
|
prev = cur;
|
||||||
|
}
|
||||||
|
|
||||||
|
value->str = strndup(buf, pos);
|
||||||
|
std::free(buf);
|
||||||
|
|
||||||
|
next(true);
|
||||||
|
consume(true);
|
||||||
|
|
||||||
|
return TOK_BLOCK;
|
||||||
|
}
|
||||||
|
|
||||||
int lex_t::lex_symbol(parser_value_t *value, bool terminal) {
|
int lex_t::lex_symbol(parser_value_t *value, bool terminal) {
|
||||||
if (needspace)
|
if (needspace)
|
||||||
return syntax_error(value);
|
return syntax_error(value);
|
||||||
|
@ -293,8 +372,6 @@ int lex_t::lex(parser_value_t *value) {
|
||||||
|
|
||||||
case ';':
|
case ';':
|
||||||
case ':':
|
case ':':
|
||||||
case '{':
|
|
||||||
case '}':
|
|
||||||
case '|':
|
case '|':
|
||||||
case '=':
|
case '=':
|
||||||
token = current();
|
token = current();
|
||||||
|
@ -341,6 +418,9 @@ int lex_t::lex(parser_value_t *value) {
|
||||||
|
|
||||||
return TOK_CHAR;
|
return TOK_CHAR;
|
||||||
|
|
||||||
|
case '{':
|
||||||
|
return lex_block(value);
|
||||||
|
|
||||||
//case '"':
|
//case '"':
|
||||||
//return lex_string(value);
|
//return lex_string(value);
|
||||||
|
|
||||||
|
|
|
@ -55,7 +55,7 @@ private:
|
||||||
size_t start;
|
size_t start;
|
||||||
size_t end;
|
size_t end;
|
||||||
size_t tok_len;
|
size_t tok_len;
|
||||||
char buffer[65536];
|
char buffer[1024];
|
||||||
|
|
||||||
|
|
||||||
bool advance();
|
bool advance();
|
||||||
|
@ -65,13 +65,13 @@ private:
|
||||||
int io_error(parser_value_t *value);
|
int io_error(parser_value_t *value);
|
||||||
int syntax_error(parser_value_t *value);
|
int syntax_error(parser_value_t *value);
|
||||||
int consume_comment(parser_value_t *value);
|
int consume_comment(parser_value_t *value);
|
||||||
|
int unterminated_block(parser_value_t *value);
|
||||||
//int unterminated_string(parser_value_t *value);
|
//int unterminated_string(parser_value_t *value);
|
||||||
|
|
||||||
int lex_string(parser_value_t *value);
|
//int lex_string(parser_value_t *value);
|
||||||
int lex_address(parser_value_t *value);
|
|
||||||
int lex_float(parser_value_t *value);
|
|
||||||
int lex_number(parser_value_t *value);
|
int lex_number(parser_value_t *value);
|
||||||
int lex_keyword(parser_value_t *value);
|
int lex_keyword(parser_value_t *value);
|
||||||
|
int lex_block(parser_value_t *value);
|
||||||
int lex_symbol(parser_value_t *value, bool terminal);
|
int lex_symbol(parser_value_t *value, bool terminal);
|
||||||
|
|
||||||
char current() {
|
char current() {
|
||||||
|
|
|
@ -64,8 +64,8 @@ void output_t::emit_tokens() {
|
||||||
}
|
}
|
||||||
|
|
||||||
void output_t::emit_token_value() {
|
void output_t::emit_token_value() {
|
||||||
std::fprintf(source_file, "typedef struct %stoken_value {\n", prefix());
|
std::fprintf(header_file, "typedef struct %stoken_value {\n", prefix());
|
||||||
std::fprintf(source_file, "} %stoken_value_t;\n\n", prefix());
|
std::fprintf(header_file, "} %stoken_value_t;\n\n", prefix());
|
||||||
}
|
}
|
||||||
|
|
||||||
void output_t::emit_header() {
|
void output_t::emit_header() {
|
||||||
|
@ -75,6 +75,22 @@ void output_t::emit_header() {
|
||||||
std::fprintf(header_file, "typedef struct %scontext %scontext_t;\n", prefix(), prefix());
|
std::fprintf(header_file, "typedef struct %scontext %scontext_t;\n", prefix(), prefix());
|
||||||
}
|
}
|
||||||
|
|
||||||
|
void output_t::emit_reduction(unsigned rule_id, const std::string &action) {
|
||||||
|
std::fprintf(source_file, "void %sreduce_%u(void) {", prefix(), rule_id);
|
||||||
|
std::fprintf(source_file, "%s", action.c_str());
|
||||||
|
std::fprintf(source_file, "}\n\n");
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
void output_t::emit_reductions() {
|
||||||
|
const auto &rules = generator->get_rules();
|
||||||
|
|
||||||
|
for (size_t i = 0; i < rules.size(); i++) {
|
||||||
|
if (!rules[i].second.empty())
|
||||||
|
emit_reduction(i, rules[i].second);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
void output_t::emit_state_shift(unsigned i) {
|
void output_t::emit_state_shift(unsigned i) {
|
||||||
std::fprintf(source_file, "\t\t\tswitch (token) {\n");
|
std::fprintf(source_file, "\t\t\tswitch (token) {\n");
|
||||||
|
|
||||||
|
@ -103,10 +119,13 @@ void output_t::emit_state_shift(unsigned i) {
|
||||||
std::fprintf(source_file, "\t\t\t}\n");
|
std::fprintf(source_file, "\t\t\t}\n");
|
||||||
}
|
}
|
||||||
|
|
||||||
void output_t::emit_state_reduce(const item_t &item) {
|
void output_t::emit_state_reduce(const item_t &item, int rule_id) {
|
||||||
if (item.get_rhs().size())
|
if (item.get_rhs().size())
|
||||||
std::fprintf(source_file, "\t\t\tparser->top -= %u;\n", unsigned(item.get_rhs().size()));
|
std::fprintf(source_file, "\t\t\tparser->top -= %u;\n", unsigned(item.get_rhs().size()));
|
||||||
|
|
||||||
|
if (rule_id >= 0)
|
||||||
|
std::fprintf(source_file, "\t\t\t%sreduce_%i();\n", prefix(), rule_id);
|
||||||
|
|
||||||
std::vector<std::pair<unsigned, unsigned>> gotos;
|
std::vector<std::pair<unsigned, unsigned>> gotos;
|
||||||
|
|
||||||
for (size_t i = 0; i < generator->get_state_count(); i++) {
|
for (size_t i = 0; i < generator->get_state_count(); i++) {
|
||||||
|
@ -142,10 +161,13 @@ void output_t::emit_state(unsigned i) {
|
||||||
std::fprintf(source_file, "\t\tcase %u:\n", i);
|
std::fprintf(source_file, "\t\tcase %u:\n", i);
|
||||||
|
|
||||||
auto it = generator->get_reductions().find(i);
|
auto it = generator->get_reductions().find(i);
|
||||||
if (it == generator->get_reductions().end())
|
if (it == generator->get_reductions().end()) {
|
||||||
emit_state_shift(i);
|
emit_state_shift(i);
|
||||||
else
|
}
|
||||||
emit_state_reduce(generator->get_rules()[it->second]);
|
else {
|
||||||
|
const auto &rule = generator->get_rules()[it->second];
|
||||||
|
emit_state_reduce(rule.first, rule.second.empty() ? -1 : it->second);
|
||||||
|
}
|
||||||
|
|
||||||
std::fprintf(source_file, "\t\t\tbreak;\n\n");
|
std::fprintf(source_file, "\t\t\tbreak;\n\n");
|
||||||
}
|
}
|
||||||
|
@ -160,10 +182,12 @@ void output_t::emit_source() {
|
||||||
std::fprintf(source_file, "\tunsigned state;\n");
|
std::fprintf(source_file, "\tunsigned state;\n");
|
||||||
std::fprintf(source_file, "} %scontext_state_t;\n\n", prefix());
|
std::fprintf(source_file, "} %scontext_state_t;\n\n", prefix());
|
||||||
|
|
||||||
std::fprintf(source_file, "typedef struct %scontext {\n", prefix());
|
std::fprintf(source_file, "struct %scontext {\n", prefix());
|
||||||
std::fprintf(source_file, "\tunsigned top;\n");
|
std::fprintf(source_file, "\tunsigned top;\n");
|
||||||
std::fprintf(source_file, "\t%scontext_state_t stack[%u];\n", prefix(), stack_size);
|
std::fprintf(source_file, "\t%scontext_state_t stack[%u];\n", prefix(), stack_size);
|
||||||
std::fprintf(source_file, "} %scontext_t;\n\n", prefix());
|
std::fprintf(source_file, "};\n\n");
|
||||||
|
|
||||||
|
emit_reductions();
|
||||||
|
|
||||||
std::fprintf(source_file, "int %spush(%scontext_t *parser, int token, const %stoken_value_t *value) {\n", prefix(), prefix(), prefix());
|
std::fprintf(source_file, "int %spush(%scontext_t *parser, int token, const %stoken_value_t *value) {\n", prefix(), prefix(), prefix());
|
||||||
std::fprintf(source_file, "\twhile (1) {\n");
|
std::fprintf(source_file, "\twhile (1) {\n");
|
||||||
|
|
|
@ -59,8 +59,10 @@ private:
|
||||||
void emit_token_value();
|
void emit_token_value();
|
||||||
void emit_header();
|
void emit_header();
|
||||||
|
|
||||||
|
void emit_reduction(unsigned rule_id, const std::string &action);
|
||||||
|
void emit_reductions();
|
||||||
void emit_state_shift(unsigned i);
|
void emit_state_shift(unsigned i);
|
||||||
void emit_state_reduce(const item_t &item);
|
void emit_state_reduce(const item_t &item, int rule_id);
|
||||||
void emit_state(unsigned i);
|
void emit_state(unsigned i);
|
||||||
void emit_states();
|
void emit_states();
|
||||||
void emit_source();
|
void emit_source();
|
||||||
|
|
|
@ -98,6 +98,11 @@ int parser_push(parser_t *parser, int token, const parser_value_t *value, parser
|
||||||
return 1;
|
return 1;
|
||||||
|
|
||||||
case TOK_BLOCK:
|
case TOK_BLOCK:
|
||||||
|
state->add_rule(value->str);
|
||||||
|
free(value->str);
|
||||||
|
parser->state = STATE_INIT;
|
||||||
|
return 1;
|
||||||
|
|
||||||
case ';':
|
case ';':
|
||||||
state->add_rule();
|
state->add_rule();
|
||||||
parser->state = STATE_INIT;
|
parser->state = STATE_INIT;
|
||||||
|
|
|
@ -52,8 +52,8 @@ void parser_state_t::add_rule_terminal(unsigned char term) {
|
||||||
current.get_rhs().emplace_back(symbol_t::make_char(term));
|
current.get_rhs().emplace_back(symbol_t::make_char(term));
|
||||||
}
|
}
|
||||||
|
|
||||||
void parser_state_t::add_rule() {
|
void parser_state_t::add_rule(const std::string &action) {
|
||||||
rules.emplace_back(current);
|
rules.emplace_back(current, action);
|
||||||
}
|
}
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
|
@ -33,14 +33,14 @@ namespace solar {
|
||||||
|
|
||||||
class parser_state_t {
|
class parser_state_t {
|
||||||
private:
|
private:
|
||||||
std::vector<item_t> rules;
|
std::vector<std::pair<item_t, std::string>> rules;
|
||||||
|
|
||||||
item_t current;
|
item_t current;
|
||||||
|
|
||||||
public:
|
public:
|
||||||
parser_state_t() : current("") {}
|
parser_state_t() : current("") {}
|
||||||
|
|
||||||
const std::vector<item_t> & get_rules() const {
|
const std::vector<std::pair<item_t, std::string>> & get_rules() const {
|
||||||
return rules;
|
return rules;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -48,7 +48,7 @@ public:
|
||||||
void add_rule_nonterminal(const char *nonterm);
|
void add_rule_nonterminal(const char *nonterm);
|
||||||
void add_rule_terminal(const char *term);
|
void add_rule_terminal(const char *term);
|
||||||
void add_rule_terminal(unsigned char term);
|
void add_rule_terminal(unsigned char term);
|
||||||
void add_rule();
|
void add_rule(const std::string &action = "");
|
||||||
};
|
};
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
Reference in a new issue