From 1226366351300139f2d8b7c10fee29d744591426 Mon Sep 17 00:00:00 2001
From: Matthias Schiffer <matthias@gamezock.de>
Date: Wed, 22 Oct 2008 20:56:56 +0200
Subject: Alte Split-Funktion durch neuen Tokenizer ersetzt

---
 src/Common/Tokenizer.cpp | 125 +++++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 125 insertions(+)
 create mode 100644 src/Common/Tokenizer.cpp

(limited to 'src/Common/Tokenizer.cpp')
diff --git a/src/Common/Tokenizer.cpp b/src/Common/Tokenizer.cpp
new file mode 100644
index 0000000..1d7462f
--- /dev/null
+++ b/src/Common/Tokenizer.cpp
@@ -0,0 +1,125 @@
+/*
+ * Tokenizer.cpp
+ *
+ * Copyright (C) 2008 Matthias Schiffer <matthias@gamezock.de>
+ *
+ * This program is free software: you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License as published by the
+ * Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+ * See the GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License along
+ * with this program. If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#include "Tokenizer.h"
+
+#include <iostream>
+
+namespace Mad {
+namespace Common {
+
+const std::string Tokenizer::delimiters = " \t\n\"'\\";
+
+
+std::vector<std::string> Tokenizer::split(const std::string &str) {
+  std::vector<std::string> ret;
+
+  for(size_t s = 0; s < str.length();) {
+    size_t index = str.find_first_of(delimiters, s);
+    size_t length = (index == std::string::npos) ? std::string::npos : index-s;
+
+    ret.push_back(str.substr(s, length));
+
+    if(index != std::string::npos) {
+      size_t index2 = str.find_first_not_of(delimiters, index);
+
+      length = (index2 == std::string::npos) ? std::string::npos : index2-index;
+
+      ret.push_back(str.substr(index, length));
+
+      if(index2 != std::string::npos)
+        s = index2;
+      else
+        break;
+    }
+    else
+      break;
+  }
+
+  return ret;
+}
+
+bool Tokenizer::tokenize(const std::string &str, std::vector<std::string> &out) {
+  std::vector<std::string> splitString = split(str);
+
+  bool singleQuotes = false, doubleQuotes = false, escape = false;
+  std::string token;
+  bool forceToken = false;
+
+  out.clear();
+
+  for(std::vector<std::string>::iterator s = splitString.begin(); s != splitString.end(); ++s) {
+    token += *s;
+    escape = false;
+
+    if(++s == splitString.end())
+      break;
+
+    for(std::string::iterator c = s->begin(); c != s->end(); ++c) {
+      if(escape || (singleQuotes && *c != '\'')) {
+        token += *c;
+        escape = false;
+        continue;
+      }
+
+      switch(*c) {
+        case ' ':
+        case '\t':
+        case '\n':
+          if(doubleQuotes) {
+            token += *c;
+          }
+          else {
+            if(!token.empty() || forceToken) {
+              out.push_back(token);
+              token.clear();
+              forceToken = false;
+            }
+          }
+          break;
+
+        case '"':
+          doubleQuotes = !doubleQuotes;
+          forceToken = true;
+          break;
+
+        case '\'':
+          if(doubleQuotes) {
+            token += *c;
+          }
+          else {
+            singleQuotes = !singleQuotes;
+            forceToken = true;
+          }
+          break;
+
+        case '\\':
+          escape = true;
+      }
+    }
+  }
+
+  if(!token.empty() || forceToken)
+    out.push_back(token);
+
+  return !(singleQuotes || doubleQuotes || escape);
+}
+
+}
+}
-- 
cgit v1.2.3