From 1226366351300139f2d8b7c10fee29d744591426 Mon Sep 17 00:00:00 2001 From: Matthias Schiffer Date: Wed, 22 Oct 2008 20:56:56 +0200 Subject: Alte Split-Funktion durch neuen Tokenizer ersetzt --- src/Common/Tokenizer.cpp | 125 +++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 125 insertions(+) create mode 100644 src/Common/Tokenizer.cpp (limited to 'src/Common/Tokenizer.cpp') diff --git a/src/Common/Tokenizer.cpp b/src/Common/Tokenizer.cpp new file mode 100644 index 0000000..1d7462f --- /dev/null +++ b/src/Common/Tokenizer.cpp @@ -0,0 +1,125 @@ +/* + * Tokenizer.cpp + * + * Copyright (C) 2008 Matthias Schiffer + * + * This program is free software: you can redistribute it and/or modify it + * under the terms of the GNU General Public License as published by the + * Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * See the GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License along + * with this program. If not, see . + */ + +#include "Tokenizer.h" + +#include + +namespace Mad { +namespace Common { + +const std::string Tokenizer::delimiters = " \t\n\"'\\"; + + +std::vector Tokenizer::split(const std::string &str) { + std::vector ret; + + for(size_t s = 0; s < str.length();) { + size_t index = str.find_first_of(delimiters, s); + size_t length = (index == std::string::npos) ? std::string::npos : index-s; + + ret.push_back(str.substr(s, length)); + + if(index != std::string::npos) { + size_t index2 = str.find_first_not_of(delimiters, index); + + length = (index2 == std::string::npos) ? std::string::npos : index2-index; + + ret.push_back(str.substr(index, length)); + + if(index2 != std::string::npos) + s = index2; + else + break; + } + else + break; + } + + return ret; +} + +bool Tokenizer::tokenize(const std::string &str, std::vector &out) { + std::vector splitString = split(str); + + bool singleQuotes = false, doubleQuotes = false, escape = false; + std::string token; + bool forceToken = false; + + out.clear(); + + for(std::vector::iterator s = splitString.begin(); s != splitString.end(); ++s) { + token += *s; + escape = false; + + if(++s == splitString.end()) + break; + + for(std::string::iterator c = s->begin(); c != s->end(); ++c) { + if(escape || (singleQuotes && *c != '\'')) { + token += *c; + escape = false; + continue; + } + + switch(*c) { + case ' ': + case '\t': + case '\n': + if(doubleQuotes) { + token += *c; + } + else { + if(!token.empty() || forceToken) { + out.push_back(token); + token.clear(); + forceToken = false; + } + } + break; + + case '"': + doubleQuotes = !doubleQuotes; + forceToken = true; + break; + + case '\'': + if(doubleQuotes) { + token += *c; + } + else { + singleQuotes = !singleQuotes; + forceToken = true; + } + break; + + case '\\': + escape = true; + } + } + } + + if(!token.empty() || forceToken) + out.push_back(token); + + return !(singleQuotes || doubleQuotes || escape); +} + +} +} -- cgit v1.2.3