/* * Tokenizer.cpp * * Copyright (C) 2008 Matthias Schiffer * * This program is free software: you can redistribute it and/or modify it * under the terms of the GNU Lesser General Public License as published by the * Free Software Foundation, either version 3 of the License, or * (at your option) any later version. * * This program is distributed in the hope that it will be useful, but * WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. * See the GNU Lesser General Public License for more details. * * You should have received a copy of the GNU Lesser General Public License along * with this program. If not, see . */ #include "Tokenizer.h" namespace Mad { namespace Core { const std::string Tokenizer::delimiters = " \t\n\"'\\"; std::vector Tokenizer::split(const std::string &str) { std::vector ret; for(size_t s = 0; s < str.length();) { size_t index = str.find_first_of(delimiters, s); size_t length = (index == std::string::npos) ? std::string::npos : index-s; ret.push_back(str.substr(s, length)); if(index != std::string::npos) { size_t index2 = str.find_first_not_of(delimiters, index); length = (index2 == std::string::npos) ? std::string::npos : index2-index; ret.push_back(str.substr(index, length)); if(index2 != std::string::npos) s = index2; else break; } else break; } return ret; } bool Tokenizer::tokenize(const std::string &str, std::vector &out) { std::vector splitString = split(str); bool singleQuotes = false, doubleQuotes = false, escape = false; std::string token; bool forceToken = false; out.clear(); for(std::vector::iterator s = splitString.begin(); s != splitString.end(); ++s) { token += *s; escape = false; if(++s == splitString.end()) break; for(std::string::iterator c = s->begin(); c != s->end(); ++c) { if(*c == '\n' && escape) { escape = false; if(doubleQuotes) continue; } if(escape || (singleQuotes && *c != '\'')) { token += *c; escape = false; continue; } switch(*c) { case ' ': case '\t': case '\n': if(doubleQuotes) { token += *c; } else { if(!token.empty() || forceToken) { out.push_back(token); token.clear(); forceToken = false; } } break; case '"': doubleQuotes = !doubleQuotes; forceToken = true; break; case '\'': if(doubleQuotes) { token += *c; } else { singleQuotes = !singleQuotes; forceToken = true; } break; case '\\': escape = true; } } } if(!token.empty() || forceToken) out.push_back(token); return !(singleQuotes || doubleQuotes || escape); } } }