diff options
author | Matthias Schiffer <matthias@gamezock.de> | 2009-06-09 19:01:02 +0200 |
---|---|---|
committer | Matthias Schiffer <matthias@gamezock.de> | 2009-06-09 19:01:02 +0200 |
commit | 766c56a693e8b1bd4293459bb256abdc0515a0b5 (patch) | |
tree | da8e51003cf801344e22b0b2b2e28a48d6a8b74c /src/Core/Tokenizer.cpp | |
parent | 452320b5ec31447a526735016fa07589cb848032 (diff) | |
download | mad-766c56a693e8b1bd4293459bb256abdc0515a0b5.tar mad-766c56a693e8b1bd4293459bb256abdc0515a0b5.zip |
Teile der Namespaces Common und Net in den neuen Namespace Core verschoben
Diffstat (limited to 'src/Core/Tokenizer.cpp')
-rw-r--r-- | src/Core/Tokenizer.cpp | 131 |
1 files changed, 131 insertions, 0 deletions
diff --git a/src/Core/Tokenizer.cpp b/src/Core/Tokenizer.cpp new file mode 100644 index 0000000..a812ed1 --- /dev/null +++ b/src/Core/Tokenizer.cpp @@ -0,0 +1,131 @@ +/* + * Tokenizer.cpp + * + * Copyright (C) 2008 Matthias Schiffer <matthias@gamezock.de> + * + * This program is free software: you can redistribute it and/or modify it + * under the terms of the GNU General Public License as published by the + * Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * See the GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License along + * with this program. If not, see <http://www.gnu.org/licenses/>. + */ + +#include "Tokenizer.h" + +namespace Mad { +namespace Core { + +const std::string Tokenizer::delimiters = " \t\n\"'\\"; + + +std::vector<std::string> Tokenizer::split(const std::string &str) { + std::vector<std::string> ret; + + for(size_t s = 0; s < str.length();) { + size_t index = str.find_first_of(delimiters, s); + size_t length = (index == std::string::npos) ? std::string::npos : index-s; + + ret.push_back(str.substr(s, length)); + + if(index != std::string::npos) { + size_t index2 = str.find_first_not_of(delimiters, index); + + length = (index2 == std::string::npos) ? std::string::npos : index2-index; + + ret.push_back(str.substr(index, length)); + + if(index2 != std::string::npos) + s = index2; + else + break; + } + else + break; + } + + return ret; +} + +bool Tokenizer::tokenize(const std::string &str, std::vector<std::string> &out) { + std::vector<std::string> splitString = split(str); + + bool singleQuotes = false, doubleQuotes = false, escape = false; + std::string token; + bool forceToken = false; + + out.clear(); + + for(std::vector<std::string>::iterator s = splitString.begin(); s != splitString.end(); ++s) { + token += *s; + escape = false; + + if(++s == splitString.end()) + break; + + for(std::string::iterator c = s->begin(); c != s->end(); ++c) { + if(*c == '\n' && escape) { + escape = false; + + if(doubleQuotes) + continue; + } + + if(escape || (singleQuotes && *c != '\'')) { + token += *c; + + escape = false; + continue; + } + + switch(*c) { + case ' ': + case '\t': + case '\n': + if(doubleQuotes) { + token += *c; + } + else { + if(!token.empty() || forceToken) { + out.push_back(token); + token.clear(); + forceToken = false; + } + } + break; + + case '"': + doubleQuotes = !doubleQuotes; + forceToken = true; + break; + + case '\'': + if(doubleQuotes) { + token += *c; + } + else { + singleQuotes = !singleQuotes; + forceToken = true; + } + break; + + case '\\': + escape = true; + } + } + } + + if(!token.empty() || forceToken) + out.push_back(token); + + return !(singleQuotes || doubleQuotes || escape); +} + +} +} |