1 files changed, 26 insertions, 24 deletions
diff --git a/src/Core/Tokenizer.cpp b/src/Core/Tokenizer.cpp
index 4043b39..93c853a 100644
--- a/src/Core/Tokenizer.cpp
+++ b/src/Core/Tokenizer.cpp
@@ -18,30 +18,32 @@
  */
 
 #include "Tokenizer.h"
+#include <boost/cstdint.hpp>
+#include <unicode/schriter.h>
 
 namespace Mad {
 namespace Core {
 
-const std::string Tokenizer::delimiters = " \t\n\"'\\";
+const UnicodeString Tokenizer::delimiters(" \t\n\"'\\");
 
 
-std::vector<std::string> Tokenizer::split(const std::string &str) {
-  std::vector<std::string> ret;
+std::vector<UnicodeString> Tokenizer::split(const UnicodeString &str) {
+  std::vector<UnicodeString> ret;
 
-  for(size_t s = 0; s < str.length();) {
-    size_t index = str.find_first_of(delimiters, s);
-    size_t length = (index == std::string::npos) ? std::string::npos : index-s;
+  for(boost::int32_t s = 0; s < str.length();) {
+    boost::int32_t index = str.findFirstOf(delimiters, s);
+    boost::int32_t length = (index < 0) ? -1 : index-s;
 
     ret.push_back(str.substr(s, length));
 
-    if(index != std::string::npos) {
-      size_t index2 = str.find_first_not_of(delimiters, index);
+    if(index >= 0) {
+      boost::int32_t index2 = str.findFirstNotOf(delimiters, index);
 
-      length = (index2 == std::string::npos) ? std::string::npos : index2-index;
+      length = (index2 < 0) ? -1 : index2-index;
 
       ret.push_back(str.substr(index, length));
 
-      if(index2 != std::string::npos)
+      if(index2 >= 0)
         s = index2;
       else
         break;
@@ -53,48 +55,48 @@ std::vector<std::string> Tokenizer::split(const std::string &str) {
   return ret;
 }
 
-bool Tokenizer::tokenize(const std::string &str, std::vector<std::string> &out) {
-  std::vector<std::string> splitString = split(str);
+bool Tokenizer::tokenize(const UnicodeString &str, std::vector<UnicodeString> &out) {
+  std::vector<UnicodeString> splitString = split(str);
 
   bool singleQuotes = false, doubleQuotes = false, escape = false;
-  std::string token;
+  UnicodeString token;
   bool forceToken = false;
 
   out.clear();
 
-  for(std::vector<std::string>::iterator s = splitString.begin(); s != splitString.end(); ++s) {
+  for(std::vector<UnicodeString>::iterator s = splitString.begin(); s != splitString.end(); ++s) {
     token += *s;
     escape = false;
 
     if(++s == splitString.end())
       break;
 
-    for(std::string::iterator c = s->begin(); c != s->end(); ++c) {
-      if(*c == '\n' && escape) {
+    for(icu::StringCharacterIterator c(*s); c.hasNext(); c.next()) {
+      if(c.current() == '\n' && escape) {
         escape = false;
 
         if(doubleQuotes)
           continue;
       }
 
-      if(escape || (singleQuotes && *c != '\'')) {
-        token += *c;
+      if(escape || (singleQuotes && c.current() != '\'')) {
+        token += c.current();
 
         escape = false;
         continue;
       }
 
-      switch(*c) {
+      switch(c.current()) {
         case ' ':
         case '\t':
         case '\n':
           if(doubleQuotes) {
-            token += *c;
+            token += c.current();
           }
           else {
-            if(!token.empty() || forceToken) {
+            if(!token.isEmpty() || forceToken) {
               out.push_back(token);
-              token.clear();
+              token.remove();
               forceToken = false;
             }
           }
@@ -107,7 +109,7 @@ bool Tokenizer::tokenize(const std::string &str, std::vector<std::string> &out)
 
         case '\'':
           if(doubleQuotes) {
-            token += *c;
+            token += c.current();
           }
           else {
             singleQuotes = !singleQuotes;
@@ -121,7 +123,7 @@ bool Tokenizer::tokenize(const std::string &str, std::vector<std::string> &out)
     }
   }
 
-  if(!token.empty() || forceToken)
+  if(!token.isEmpty() || forceToken)
     out.push_back(token);
 
   return !(singleQuotes || doubleQuotes || escape);