From 91569105085ac7d1d3a1f5fc0f1adec83c81413b Mon Sep 17 00:00:00 2001 From: Matthias Schiffer Date: Mon, 17 Mar 2014 19:21:24 +0100 Subject: Remove flex scanner, use scanner from fastd (with lots of adjustions) --- CMakeLists.txt | 1 - FindFLEX.cmake | 120 -------------- examples/babel_test.mmss | 6 +- mmss/CMakeLists.txt | 3 +- mmss/config.cpp | 16 +- mmss/config.l | 170 -------------------- mmss/config.y | 6 +- mmss/lex.cpp | 399 +++++++++++++++++++++++++++++++++++++++++++++++ mmss/lex.hpp | 81 ++++++++++ 9 files changed, 493 insertions(+), 309 deletions(-) delete mode 100644 FindFLEX.cmake delete mode 100644 mmss/config.l create mode 100644 mmss/lex.cpp create mode 100644 mmss/lex.hpp diff --git a/CMakeLists.txt b/CMakeLists.txt index 08a9052..6509ceb 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -4,7 +4,6 @@ project(GMRF C CXX) set(CMAKE_MODULE_PATH ${GMRF_SOURCE_DIR}) find_package(BISON 2.5 REQUIRED) -find_package(FLEX REQUIRED) add_subdirectory(mmss) add_subdirectory(mmss-protocol) diff --git a/FindFLEX.cmake b/FindFLEX.cmake deleted file mode 100644 index fe83369..0000000 --- a/FindFLEX.cmake +++ /dev/null @@ -1,120 +0,0 @@ -#============================================================================= -# Copyright 2009 Kitware, Inc. -# Copyright 2006 Tristan Carel -# -# Redistribution and use in source and binary forms, with or without -# modification, are permitted provided that the following conditions -# are met: -# -# * Redistributions of source code must retain the above copyright -# notice, this list of conditions and the following disclaimer. -# -# * Redistributions in binary form must reproduce the above copyright -# notice, this list of conditions and the following disclaimer in the -# documentation and/or other materials provided with the distribution. -# -# * Neither the names of Kitware, Inc., the Insight Software Consortium, -# nor the names of their contributors may be used to endorse or promote -# products derived from this software without specific prior written -# permission. -# -# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS -# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT -# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR -# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT -# HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, -# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT -# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, -# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY -# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT -# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE -# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -# -# ------------------------------------------------------------------------------ -# -# The above copyright and license notice applies to distributions of -# CMake in source and binary form. Some source files contain additional -# notices of original copyright by their contributors; see each source -# for details. Third-party software packages supplied with CMake under -# compatible licenses provide their own copyright notices documented in -# corresponding subdirectories. -#============================================================================= - -FIND_PROGRAM(FLEX_EXECUTABLE flex DOC "path to the flex executable") -MARK_AS_ADVANCED(FLEX_EXECUTABLE) - -FIND_LIBRARY(FL_LIBRARY NAMES fl - DOC "Path to the fl library") - -FIND_PATH(FLEX_INCLUDE_DIR FlexLexer.h - DOC "Path to the flex headers") - -MARK_AS_ADVANCED(FL_LIBRARY FLEX_INCLUDE_DIR) - -SET(FLEX_INCLUDE_DIRS ${FLEX_INCLUDE_DIR}) -SET(FLEX_LIBRARIES ${FL_LIBRARY}) - -IF(FLEX_EXECUTABLE) - - EXECUTE_PROCESS(COMMAND ${FLEX_EXECUTABLE} --version - OUTPUT_VARIABLE FLEX_version_output - ERROR_VARIABLE FLEX_version_error - RESULT_VARIABLE FLEX_version_result - OUTPUT_STRIP_TRAILING_WHITESPACE) - IF(NOT ${FLEX_version_result} EQUAL 0) - IF(FLEX_FIND_REQUIRED) - MESSAGE(SEND_ERROR "Command \"${FLEX_EXECUTABLE} --version\" failed with output:\n${FLEX_version_output}\n${FLEX_version_error}") - ELSE() - MESSAGE("Command \"${FLEX_EXECUTABLE} --version\" failed with output:\n${FLEX_version_output}\n${FLEX_version_error}\nFLEX_VERSION will not be available") - ENDIF() - ELSE() - STRING(REGEX REPLACE "^flex (.*)$" "\\1" - FLEX_VERSION "${FLEX_version_output}") - ENDIF() - - #============================================================ - # FLEX_TARGET (public macro) - #============================================================ - # - MACRO(FLEX_TARGET Name Input Output) - SET(FLEX_TARGET_usage "FLEX_TARGET( [COMPILE_FLAGS ]") - IF(${ARGC} GREATER 3) - IF(${ARGC} EQUAL 5) - IF("${ARGV3}" STREQUAL "COMPILE_FLAGS") - SET(FLEX_EXECUTABLE_opts "${ARGV4}") - SEPARATE_ARGUMENTS(FLEX_EXECUTABLE_opts) - ELSE() - MESSAGE(SEND_ERROR ${FLEX_TARGET_usage}) - ENDIF() - ELSE() - MESSAGE(SEND_ERROR ${FLEX_TARGET_usage}) - ENDIF() - ENDIF() - - STRING(REGEX REPLACE "^(.*)(\\.[^.]*)$" "\\2" _fileext "${Output}") - STRING(REPLACE "c" "h" _fileext ${_fileext}) - STRING(REGEX REPLACE "^(.*)(\\.[^.]*)$" "\\1${_fileext}" - OutputHeader "${Output}") - - ADD_CUSTOM_COMMAND(OUTPUT ${Output} ${OutputHeader} - COMMAND ${FLEX_EXECUTABLE} - ARGS ${FLEX_EXECUTABLE_opts} -o${Output} --header-file=${OutputHeader} ${Input} - DEPENDS ${Input} - COMMENT "[FLEX][${Name}] Building scanner with flex ${FLEX_VERSION}" - WORKING_DIRECTORY ${CMAKE_CURRENT_SOURCE_DIR}) - - SET(FLEX_${Name}_DEFINED TRUE) - SET(FLEX_${Name}_OUTPUTS ${Output} ${OutputHeader}) - SET(FLEX_${Name}_OUTPUT_HEADER ${OutputHeader}) - SET(FLEX_${Name}_INPUT ${Input}) - SET(FLEX_${Name}_COMPILE_FLAGS ${FLEX_EXECUTABLE_opts}) - ENDMACRO(FLEX_TARGET) - #============================================================ - - -ENDIF(FLEX_EXECUTABLE) - -FIND_PACKAGE_HANDLE_STANDARD_ARGS(FLEX REQUIRED_VARS FLEX_EXECUTABLE - VERSION_VAR FLEX_VERSION) - -# FindFLEX.cmake ends here diff --git a/examples/babel_test.mmss b/examples/babel_test.mmss index ca0a069..fad6f0a 100644 --- a/examples/babel_test.mmss +++ b/examples/babel_test.mmss @@ -1,15 +1,15 @@ protocol "babel" load "./libmmss_proto_babel.so" default; network "net0" { - etx min 1.0 max 5.0 sine period 15m; + etx min 1.0 max 5.0 sine period 15 m; } network "net1" { - etx min 1.0 max 5.0 sine period 15m phase 5m; + etx min 1.0 max 5.0 sine period 15 m phase 5 m; } network "net2" { - etx min 1.0 max 5.0 sine period 15m phase 10m; + etx min 1.0 max 5.0 sine period 15 m phase 10 m; } node "node1" { diff --git a/mmss/CMakeLists.txt b/mmss/CMakeLists.txt index c71b9b1..694a49d 100644 --- a/mmss/CMakeLists.txt +++ b/mmss/CMakeLists.txt @@ -1,7 +1,6 @@ include_directories(${GMRF_SOURCE_DIR}/include ${CMAKE_CURRENT_SOURCE_DIR} ${CMAKE_CURRENT_BINARY_DIR}) set_directory_properties(PROPERTIES COMPILE_DEFINITIONS _GNU_SOURCE) -FLEX_TARGET(mmss_config_lex config.l ${CMAKE_CURRENT_BINARY_DIR}/config.ll.cpp) BISON_TARGET(mmss_config_parse config.y ${CMAKE_CURRENT_BINARY_DIR}/config.yy.cpp) add_executable(mmss @@ -10,10 +9,10 @@ add_executable(mmss event.cpp gmrf.cpp iface.cpp + lex.cpp mmss.cpp network.cpp protocol.cpp - ${FLEX_mmss_config_lex_OUTPUTS} ${BISON_mmss_config_parse_OUTPUTS} ) target_link_libraries(mmss dl) diff --git a/mmss/config.cpp b/mmss/config.cpp index 475b1c5..394ca5b 100644 --- a/mmss/config.cpp +++ b/mmss/config.cpp @@ -25,10 +25,10 @@ #include "context.hpp" +#include "lex.hpp" #include "network.hpp" #include "node.hpp" #include "protocol.hpp" -#include #include #include @@ -155,7 +155,6 @@ std::shared_ptr config_t::read(context_t *mmss, const char *file char *filename2 = NULL; char *dir = NULL; FILE *file; - yyscan_t scanner; mmss_config_pstate *ps; int token; YYSTYPE token_val; @@ -164,9 +163,6 @@ std::shared_ptr config_t::read(context_t *mmss, const char *file std::stack strings; - mmss_config_yylex_init(&scanner); - ps = mmss_config_pstate_new(); - if (!filename) { file = stdin; } @@ -174,12 +170,13 @@ std::shared_ptr config_t::read(context_t *mmss, const char *file file = fopen(filename, "r"); if (!file) { mmss->logf(LOG_ERR, "can't open config file `%s': %s", filename, strerror(errno)); - conf.reset(); - goto end_free; + return std::shared_ptr(); } } - mmss_config_yyset_in(file, scanner); + lex_t scanner(file); + ps = mmss_config_pstate_new(); + if (filename) { filename2 = strdup(filename); @@ -193,7 +190,7 @@ std::shared_ptr config_t::read(context_t *mmss, const char *file } do { - token = mmss_config_yylex(&token_val, &loc, scanner); + token = scanner.lex(&token_val, &loc); if (token < 0) { mmss->logf(LOG_ERR, "config error: %s at %s:%i:%i", token_val.error, filename, loc.first_line, loc.first_column); @@ -218,7 +215,6 @@ std::shared_ptr config_t::read(context_t *mmss, const char *file } mmss_config_pstate_delete(ps); - mmss_config_yylex_destroy(scanner); if(chdir(oldcwd)) mmss->logf(LOG_ERR, "can't chdir to `%s': %s", oldcwd, strerror(errno)); diff --git a/mmss/config.l b/mmss/config.l deleted file mode 100644 index d81ba20..0000000 --- a/mmss/config.l +++ /dev/null @@ -1,170 +0,0 @@ -/* - Copyright (c) 2013, Matthias Schiffer - All rights reserved. - - Redistribution and use in source and binary forms, with or without - modification, are permitted provided that the following conditions are met: - - 1. Redistributions of source code must retain the above copyright notice, - this list of conditions and the following disclaimer. - 2. Redistributions in binary form must reproduce the above copyright notice, - this list of conditions and the following disclaimer in the documentation - and/or other materials provided with the distribution. - - THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" - AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE - DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE - FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL - DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR - SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER - CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, - OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE - OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -*/ - - -%option prefix="mmss_config_yy" -%option noyywrap -%option nounput -%option noinput -%option bison-bridge -%option bison-locations -%option reentrant -%option warn - - -%top { - #include - - #include - #include -} - -%{ - /* register is deprecated in C++11 */ - #define register - - #pragma GCC diagnostic ignored "-Wunused-parameter" -%} - -%s NEEDSPACE -%s STRING -%s COMMENT - -%% -%{ - #define UPDATE_LOCATION do { \ - yylloc->first_line = yylloc->last_line; \ - yylloc->first_column = yylloc->last_column+1; \ - yylloc->last_column += yyleng; \ - } while (0) - - #define TOKEN(tok) do { UPDATE_LOCATION; BEGIN(NEEDSPACE); return tok; } while (0) -%} - -{ -[0-9]+ { UPDATE_LOCATION; yylval->num = std::atoi(yytext); BEGIN(NEEDSPACE); return TOK_INTEGER; } -[0-9]*\.[0-9]+ { UPDATE_LOCATION; yylval->fnum = float(std::atof(yytext)); BEGIN(NEEDSPACE); return TOK_FLOAT; } - -yes { TOKEN(TOK_YES); } -no { TOKEN(TOK_NO); } -network { TOKEN(TOK_NETWORK); } -protocol { TOKEN(TOK_PROTOCOL); } -node { TOKEN(TOK_NODE); } -load { TOKEN(TOK_LOAD); } -default { TOKEN(TOK_DEFAULT); } -interface { TOKEN(TOK_INTERFACE); } -address { TOKEN(TOK_ADDRESS); } -etx { TOKEN(TOK_ETX); } -const { TOKEN(TOK_CONST); } -min { TOKEN(TOK_MIN); } -max { TOKEN(TOK_MAX); } -sine { TOKEN(TOK_SINE); } -period { TOKEN(TOK_PERIOD); } -phase { TOKEN(TOK_PHASE); } - -([[:xdigit:]]{2}:){7}[[:xdigit:]]{2} { - UPDATE_LOCATION; - - assert(std::sscanf(yytext, "%hhx:%hhx:%hhx:%hhx:%hhx:%hhx:%hhx:%hhx", - &yylval->addr.d[0], &yylval->addr.d[1], &yylval->addr.d[2], &yylval->addr.d[3], - &yylval->addr.d[4], &yylval->addr.d[5], &yylval->addr.d[6], &yylval->addr.d[7]) == 8); - - return TOK_GMRF_ADDRESS; -} - -[;:\{\}] { UPDATE_LOCATION; return yytext[0]; } - -[ \t] { yylloc->last_column++; } -\n { yylloc->last_column = 0; yylloc->last_line++; } -\r ; -} - -{ -s { TOKEN(TOK_S); } -m { TOKEN(TOK_M); } -h { TOKEN(TOK_H); } -days { TOKEN(TOK_DAYS); } -} - -{ -[;:\{\}] { UPDATE_LOCATION; BEGIN(INITIAL); return yytext[0]; } - -[ \t] { yylloc->last_column++; BEGIN(INITIAL); } -\n { yylloc->last_column = 0; yylloc->last_line++; BEGIN(INITIAL); } -\r ; -} - -\" { UPDATE_LOCATION; BEGIN(STRING); } -[^"\\\n\r] { yylloc->last_column++; yymore(); } -\n { yylloc->last_line++; yylloc->last_column = 0; yymore(); } -\r { yymore(); } -\\. { yylloc->last_column+=2; yymore(); } -\\\n { yylloc->last_line++; yylloc->last_column = 0; yymore(); } -\" { - size_t i, esc = 0; - - for (i = 0; i < yyleng; i++) { - if (yytext[i] == '\\') { - i++; - if (yytext[i] == '\n') { - esc+=2; - } - else { - yytext[i-esc-1] = yytext[i]; - esc++; - } - } - else if(esc) { - yytext[i-esc] = yytext[i]; - } - } - yytext[yyleng-esc-1] = 0; - yylval->str = strdup(yytext); - BEGIN(NEEDSPACE); - yylloc->last_column++; - return TOK_STRING; - - } - -#.* { yylloc->last_column += yyleng; } -\/\/.* { yylloc->last_column += yyleng; } - -\/\* { UPDATE_LOCATION; BEGIN(COMMENT); } -\*\/ { yylloc->last_column += yyleng; BEGIN(INITIAL); } -[^\n\r] { yylloc->last_column++; } -\n { yylloc->last_line++; yylloc->last_column = 0; } -\r {} - -. { - yylloc->first_line = yylloc->last_line; - yylloc->first_column = yylloc->last_column+1; - yylval->error = "syntax error"; - return -1; - } - -<> { return 0; } -<> { yylval->error = "unterminated block comment"; return -1; } -<> { yylval->error = "unterminated string"; return -1; } -%% diff --git a/mmss/config.y b/mmss/config.y index d40046b..4b4647a 100644 --- a/mmss/config.y +++ b/mmss/config.y @@ -40,7 +40,7 @@ } %union { - int num; + uint64_t uint64; float fnum; bool boolean; char *str; @@ -49,7 +49,7 @@ const char *error; } -%token TOK_INTEGER +%token TOK_UINT %token TOK_FLOAT %token TOK_STRING %token TOK_GMRF_ADDRESS @@ -157,7 +157,7 @@ node_interface: TOK_STRING TOK_NETWORK TOK_STRING TOK_ADDRESS TOK_GMRF_ADDRESS { ; float: TOK_FLOAT { $$ = $1; } - | TOK_INTEGER { $$ = $1; } + | TOK_UINT { $$ = $1; } ; boolean: TOK_YES { $$ = true; } diff --git a/mmss/lex.cpp b/mmss/lex.cpp new file mode 100644 index 0000000..0ef205e --- /dev/null +++ b/mmss/lex.cpp @@ -0,0 +1,399 @@ +/* + Copyright (c) 2012-2014, Matthias Schiffer + All rights reserved. + + Redistribution and use in source and binary forms, with or without + modification, are permitted provided that the following conditions are met: + + 1. Redistributions of source code must retain the above copyright notice, + this list of conditions and the following disclaimer. + 2. Redistributions in binary form must reproduce the above copyright notice, + this list of conditions and the following disclaimer in the documentation + and/or other materials provided with the distribution. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE + DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE + FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR + SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER + CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, + OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +*/ + + +#include "lex.hpp" + +#include + + +#define array_size(array) (sizeof(array)/sizeof((array)[0])) + + +namespace MMSS { + + +struct keyword_t { + const char *keyword; + int token; +}; + +/* the keyword list must be sorted */ +static const keyword_t keywords[] = { + { "address", TOK_ADDRESS }, + { "const", TOK_CONST }, + { "days", TOK_DAYS }, + { "default", TOK_DEFAULT }, + { "etx", TOK_ETX }, + { "h", TOK_H }, + { "interface", TOK_INTERFACE }, + { "load", TOK_LOAD }, + { "m", TOK_M }, + { "max", TOK_MAX }, + { "min", TOK_MIN }, + { "network", TOK_NETWORK }, + { "no", TOK_NO }, + { "node", TOK_NODE }, + { "period", TOK_PERIOD }, + { "phase", TOK_PHASE }, + { "protocol", TOK_PROTOCOL }, + { "s", TOK_S }, + { "sine", TOK_SINE }, + { "yes", TOK_YES }, +}; + +static int compare_keywords(const void *v1, const void *v2) { + const keyword_t *k1 = static_cast(v1), *k2 = static_cast(v2); + return std::strcmp(k1->keyword, k2->keyword); +} + + +bool lex_t::advance() { + if (start > 0) { + std::memmove(buffer, buffer+start, end - start); + end -= start; + start = 0; + } + + if (end == sizeof(buffer)) + return false; + + size_t l = std::fread(buffer+end, 1, sizeof(buffer) - end, file); + + end += l; + return l; +} + +bool lex_t::next(YYLTYPE *yylloc, bool move) { + if (start + tok_len >= end) + return false; + + if (current() == '\n') { + yylloc->last_column = 0; + yylloc->last_line++; + } + else { + yylloc->last_column++; + } + + if (move) + start++; + else + tok_len++; + + + if (start + tok_len >= end) + return advance(); + + return true; +} + +void lex_t::consume(bool consume_needspace) { + start += tok_len; + tok_len = 0; + + needspace = consume_needspace; +} + +int lex_t::io_error(YYSTYPE *yylval) { + yylval->error = "I/O error"; + return -1; +} + +int lex_t::syntax_error(YYSTYPE *yylval) { + if (ferror(file)) + return io_error(yylval); + + yylval->error = "syntax error"; + return -1; +} + +int lex_t::consume_comment(YYSTYPE *yylval, YYLTYPE *yylloc) { + char prev = 0; + + while (next(yylloc, true)) { + if (prev == '*' && current() == '/') { + next(yylloc, true); + consume(false); + return 0; + } + + prev = current(); + } + + if (ferror(file)) + return io_error(yylval); + + yylval->error = "unterminated block comment"; + return -1; +} + +int lex_t::unterminated_string(YYSTYPE *yylval) { + if (ferror(file)) + return io_error(yylval); + + yylval->error = "unterminated string"; + return -1; +} + +int lex_t::parse_string(YYSTYPE *yylval, YYLTYPE *yylloc) { + char *buf = NULL; + size_t len = 1024; + size_t pos = 0; + + if (needspace) + return syntax_error(yylval); + + buf = static_cast(std::malloc(len)); + + while (true) { + if (!next(yylloc, true)) { + free(buf); + return unterminated_string(yylval); + } + + char cur = current(); + + if (cur == '"') + break; + + if (cur == '\\') { + if (!next(yylloc, true)) { + free(buf); + return unterminated_string(yylval); + } + + cur = current(); + + if (cur == '\n') + continue; + } + + if (pos >= len) { + len *= 2; + buf = static_cast(std::realloc(buf, len)); + } + + buf[pos++] = cur; + } + + yylval->str = strndup(buf, pos); + std::free(buf); + + next(yylloc, true); + consume(true); + + return TOK_STRING; +} + +int lex_t::parse_address(YYSTYPE *yylval, YYLTYPE *yylloc) { + if (needspace) + return syntax_error(yylval); + + while (next(yylloc, false)) { + char cur = current(); + + if (!((cur >= '0' && cur <= '9') || (cur >= 'a' && cur <= 'f') || (cur >= 'A' && cur <= 'F') || (cur == ':'))) + break; + } + + char *token = get_token(); + int len; + bool ok = (std::strlen(token) == 23) && + (std::sscanf(token, "%hhx:%hhx:%hhx:%hhx:%hhx:%hhx:%hhx:%hhx%n", + &yylval->addr.d[0], &yylval->addr.d[1], &yylval->addr.d[2], &yylval->addr.d[3], + &yylval->addr.d[4], &yylval->addr.d[5], &yylval->addr.d[6], &yylval->addr.d[7], + &len) >= 8) && + (len == 23); + + std::free(token); + + if (!ok) + return syntax_error(yylval); + + consume(true); + + return TOK_GMRF_ADDRESS; +} + +int lex_t::parse_float(YYSTYPE *yylval, YYLTYPE *yylloc) { + while (next(yylloc, false)) { + char cur = current(); + + if (!(cur >= '0' && cur <= '9')) + break; + } + + char *endptr, *token = get_token(); + yylval->fnum = std::strtof(token, &endptr); + + bool ok = !*endptr; + free(token); + + if (!ok) + return syntax_error(yylval); + + consume(true); + + return TOK_FLOAT; +} + +int lex_t::parse_number(YYSTYPE *yylval, YYLTYPE *yylloc) { + bool digitonly = true; + + if (needspace) + return syntax_error(yylval); + + while (next(yylloc, false)) { + char cur = current(); + + if (cur == ':') + return parse_address(yylval, yylloc); + + if (cur == '.' && digitonly) + return parse_float(yylval, yylloc); + + if (!(cur >= '0' && cur <= '9')) { + if (cur == 'x' || (cur >= 'a' && cur <= 'f') || (cur >= 'A' && cur <= 'F')) + digitonly = false; + else + break; + } + } + + char *endptr, *token = get_token(); + yylval->uint64 = std::strtoull(token, &endptr, 0); + + bool ok = !*endptr; + free(token); + + if (!ok) + return syntax_error(yylval); + + consume(true); + + return TOK_UINT; +} + +int lex_t::parse_keyword(YYSTYPE *yylval, YYLTYPE *yylloc) { + if (needspace) + return syntax_error(yylval); + + while (next(yylloc, false)) { + char cur = current(); + + if (!((cur >= 'a' && cur <= 'z') || (cur >= '0' && cur <= '9') || cur == '-')) + break; + } + + char *token = get_token(); + const keyword_t key = { .keyword = token }; + const keyword_t *ret = static_cast(bsearch(&key, keywords, array_size(keywords), sizeof(keyword_t), compare_keywords)); + free(token); + + if (!ret) + return syntax_error(yylval); + + consume(true); + + return ret->token; +} + +lex_t::lex_t(FILE *file0) : file(file0), needspace(false), start(0), end(0), tok_len(0) { + advance(); +} + +int lex_t::lex(YYSTYPE *yylval, YYLTYPE *yylloc) { + int token; + + while (end > start) { + yylloc->first_line = yylloc->last_line; + yylloc->first_column = yylloc->last_column+1; + + switch (current()) { + case ' ': + case '\n': + case '\t': + case '\r': + next(yylloc, true); + consume(false); + continue; + + case ';': + case ':': + case '{': + case '}': + token = current(); + next(yylloc, true); + consume(false); + return token; + + case '/': + if (!next(yylloc, true)) + return syntax_error(yylval); + + if (current() == '*') { + token = consume_comment(yylval, yylloc); + if (token) + return token; + + continue; + } + + if (current() != '/') + return syntax_error(yylval); + + /* fall-through */ + case '#': + while (next(yylloc, true)) { + if (current() == '\n') + break; + } + + next(yylloc, true); + consume(false); + continue; + + case '"': + return parse_string(yylval, yylloc); + + case '0' ... '9': + return parse_number(yylval, yylloc); + + case 'a' ... 'z': + return parse_keyword(yylval, yylloc); + + default: + return syntax_error(yylval); + } + } + + if (ferror(file)) + return io_error(yylval); + + return 0; +} + +} diff --git a/mmss/lex.hpp b/mmss/lex.hpp new file mode 100644 index 0000000..19bbd4d --- /dev/null +++ b/mmss/lex.hpp @@ -0,0 +1,81 @@ +/* + Copyright (c) 2012-2014, Matthias Schiffer + All rights reserved. + + Redistribution and use in source and binary forms, with or without + modification, are permitted provided that the following conditions are met: + + 1. Redistributions of source code must retain the above copyright notice, + this list of conditions and the following disclaimer. + 2. Redistributions in binary form must reproduce the above copyright notice, + this list of conditions and the following disclaimer in the documentation + and/or other materials provided with the distribution. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE + DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE + FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR + SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER + CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, + OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +*/ + + +#pragma once + +#include "types.hpp" +#include + +#include +#include + + +namespace MMSS { + +class lex_t : public nocopy_t { +private: + std::FILE *file; + + bool needspace; + + size_t start; + size_t end; + size_t tok_len; + char buffer[1024]; + + + bool advance(); + bool next(YYLTYPE *yylloc, bool move); + void consume(bool needspace); + + int io_error(YYSTYPE *yylval); + int syntax_error(YYSTYPE *yylval); + int consume_comment(YYSTYPE *yylval, YYLTYPE *yylloc); + int unterminated_string(YYSTYPE *yylval); + + int parse_string(YYSTYPE *yylval, YYLTYPE *yylloc); + int parse_address(YYSTYPE *yylval, YYLTYPE *yylloc); + int parse_float(YYSTYPE *yylval, YYLTYPE *yylloc); + int parse_number(YYSTYPE *yylval, YYLTYPE *yylloc); + int parse_keyword(YYSTYPE *yylval, YYLTYPE *yylloc); + + + char current() { + return buffer[start + tok_len]; + } + + char* get_token() { + return strndup(buffer+start, tok_len); + } + + +public: + lex_t(FILE *file); + + int lex(YYSTYPE *yylval, YYLTYPE *yylloc); +}; + +} -- cgit v1.2.3