From e1d8490f0654a3da0b900407d80d91d8d0da68c8 Mon Sep 17 00:00:00 2001 From: Matthias Schiffer Date: Sun, 27 Sep 2009 01:55:44 +0200 Subject: Use libicu to support unicode properly; migrated ConfigManager to UnicodeString --- src/Core/Application.cpp | 9 ++++++- src/Core/CMakeLists.txt | 3 ++- src/Core/ConfigEntry.cpp | 8 +++--- src/Core/ConfigEntry.h | 32 +++++++++++------------ src/Core/ConfigManager.cpp | 33 ++++++++++++------------ src/Core/LogManager.cpp | 37 +++++++++++++++------------ src/Core/LogManager.h | 3 ++- src/Core/Tokenizer.cpp | 50 +++++++++++++++++++----------------- src/Core/Tokenizer.h | 8 +++--- src/Core/UnicodeString.cpp | 44 +++++++++++++++++++++++++++++++ src/Core/UnicodeString.h | 64 ++++++++++++++++++++++++++++++++++++++++++++++ 11 files changed, 207 insertions(+), 84 deletions(-) create mode 100644 src/Core/UnicodeString.cpp create mode 100644 src/Core/UnicodeString.h (limited to 'src/Core') diff --git a/src/Core/Application.cpp b/src/Core/Application.cpp index d75cbaf..666f2cb 100644 --- a/src/Core/Application.cpp +++ b/src/Core/Application.cpp @@ -22,6 +22,7 @@ #include "LogManager.h" #include "ThreadManager.h" +#include #include #ifndef va_copy @@ -31,7 +32,13 @@ namespace Mad { namespace Core { -Application::Application() : configManager(new ConfigManager(this)), logManager(new LogManager(this)), threadManager(new ThreadManager(this)) {} +Application::Application() { + std::setlocale(LC_ALL, ""); + + configManager = new ConfigManager(this); + logManager = new LogManager(this); + threadManager = new ThreadManager(this); +} Application::~Application() { delete threadManager; diff --git a/src/Core/CMakeLists.txt b/src/Core/CMakeLists.txt index 50e62c8..9828f61 100644 --- a/src/Core/CMakeLists.txt +++ b/src/Core/CMakeLists.txt @@ -21,5 +21,6 @@ mad_library(Core Signals.h ThreadManager.cpp ThreadManager.h Tokenizer.cpp Tokenizer.h + UnicodeString.cpp UnicodeString.h ) -target_link_libraries(Core ${Boost_LIBRARIES}) +target_link_libraries(Core ${Boost_LIBRARIES} ${ICU_LIBRARIES}) diff --git a/src/Core/ConfigEntry.cpp b/src/Core/ConfigEntry.cpp index d260fa1..b587043 100644 --- a/src/Core/ConfigEntry.cpp +++ b/src/Core/ConfigEntry.cpp @@ -22,7 +22,7 @@ namespace Mad { namespace Core { -ConfigEntry::String& ConfigEntry::Entry::operator[] (size_t i) { +ConfigEntry::String& ConfigEntry::Entry::operator[] (std::size_t i) { try { return value.at(i); } @@ -32,7 +32,7 @@ ConfigEntry::String& ConfigEntry::Entry::operator[] (size_t i) { } } -const ConfigEntry::String& ConfigEntry::Entry::operator[] (size_t i) const { +const ConfigEntry::String& ConfigEntry::Entry::operator[] (std::size_t i) const { try { return value.at(i); } @@ -41,7 +41,7 @@ const ConfigEntry::String& ConfigEntry::Entry::operator[] (size_t i) const { } } -ConfigEntry::Entry& ConfigEntry::operator[] (size_t i) { +ConfigEntry::Entry& ConfigEntry::operator[] (std::size_t i) { try { return entries.at(i); } @@ -51,7 +51,7 @@ ConfigEntry::Entry& ConfigEntry::operator[] (size_t i) { } } -const ConfigEntry::Entry& ConfigEntry::operator[] (size_t i) const { +const ConfigEntry::Entry& ConfigEntry::operator[] (std::size_t i) const { try { return entries.at(i); } diff --git a/src/Core/ConfigEntry.h b/src/Core/ConfigEntry.h index fcd8dd4..d4cacc9 100644 --- a/src/Core/ConfigEntry.h +++ b/src/Core/ConfigEntry.h @@ -22,25 +22,23 @@ #include "export.h" +#include "UnicodeString.h" + #include -#include -#include #include -#include - namespace Mad { namespace Core { class MAD_CORE_EXPORT ConfigEntry { public: - class String : public std::string { + class String : public UnicodeString { public: String() {} - String(const std::string &str) : std::string(str) {} + String(const UnicodeString &str) : UnicodeString(str) {} - bool matches(const std::string &str) const { - return (boost::algorithm::to_lower_copy(static_cast(*this)) == boost::algorithm::to_lower_copy(str)); + bool matches(const UnicodeString &str) const { + return (str.caseCompare(*this, 0) == 0); } }; @@ -53,7 +51,7 @@ class MAD_CORE_EXPORT ConfigEntry { public: Entry() {} - Entry(const std::vector &args) { + Entry(const std::vector &args) { if(args.empty()) return; @@ -62,17 +60,17 @@ class MAD_CORE_EXPORT ConfigEntry { value.assign(args.begin()+1, args.end()); } - bool empty() const { - return key.empty(); + bool isEmpty() const { + return key.isEmpty(); } String &getKey() {return key;} const String &getKey() const {return key;} - size_t getSize() const {return value.size();} + std::size_t getSize() const {return value.size();} - String& operator[] (size_t i); - const String& operator[] (size_t i) const; + String& operator[] (std::size_t i); + const String& operator[] (std::size_t i) const; }; private: @@ -80,10 +78,10 @@ class MAD_CORE_EXPORT ConfigEntry { Entry zero, constZero; public: - size_t getSize() const {return entries.size();} + std::size_t getSize() const {return entries.size();} - Entry& operator[] (size_t i); - const Entry& operator[] (size_t i) const; + Entry& operator[] (std::size_t i); + const Entry& operator[] (std::size_t i) const; void push(const Entry &entry) { entries.push_back(entry); diff --git a/src/Core/ConfigManager.cpp b/src/Core/ConfigManager.cpp index bd20cc5..51d8647 100644 --- a/src/Core/ConfigManager.cpp +++ b/src/Core/ConfigManager.cpp @@ -46,7 +46,7 @@ void ConfigManager::handleConfigEntry(const ConfigEntry &entry) { } if(!handled) - application->logf(Logger::LOG_WARNING, "Invalid config option '%s'.", entry[entry.getSize()-1].getKey().c_str()); + application->logf(Logger::LOG_WARNING, "Invalid config option '%s'.", entry[entry.getSize()-1].getKey().extract().c_str()); } bool ConfigManager::loadFile(const std::string &filename) { @@ -55,30 +55,33 @@ bool ConfigManager::loadFile(const std::string &filename) { std::ifstream file(filename.c_str()); ConfigEntry entry; - std::string line, input; - char delim; - std::vector splitLine, lastConfigLine; + UnicodeString line, input; + UChar delim; + std::vector splitLine, lastConfigLine; if(!file.good()) return false; - while(!(file.eof() && line.empty() && input.empty())) { - while(input.empty() && !file.eof()) - std::getline(file, input); + while(!(file.eof() && line.isEmpty() && input.isEmpty())) { + while(input.isEmpty() && !file.eof()) { + std::string tmp; + std::getline(file, tmp); + input = tmp.c_str(); + } - if(input.empty()) + if(input.isEmpty()) break; - size_t pos = input.find_first_of("#{}"); - if(pos == std::string::npos) { + boost::int32_t pos = input.findFirstOf("#{}"); + if(pos < 0) { line += input; delim = '\n'; - input.clear(); + input.remove(); } else { line += input.substr(0, pos); delim = input[pos]; - input = input.substr(pos+1); + input.remove(0, pos+1); } if(!Tokenizer::tokenize(line, splitLine)) { @@ -87,8 +90,6 @@ bool ConfigManager::loadFile(const std::string &filename) { } if(!splitLine.empty()) { - pos = line.find_first_of(" \t"); - entry.push(splitLine); handleConfigEntry(entry); entry.pop(); @@ -98,7 +99,7 @@ bool ConfigManager::loadFile(const std::string &filename) { switch(delim) { case '#': - input.clear(); + input.remove(); break; case '{': entry.push(lastConfigLine); @@ -107,7 +108,7 @@ bool ConfigManager::loadFile(const std::string &filename) { entry.pop(); } - line.clear(); + line.remove(); } // TODO Depth check diff --git a/src/Core/LogManager.cpp b/src/Core/LogManager.cpp index af3e0ba..4a4cc0d 100644 --- a/src/Core/LogManager.cpp +++ b/src/Core/LogManager.cpp @@ -42,23 +42,28 @@ void LogManager::ConsoleLogger::logMessageDirect(MessageCategory /*category*/, M } -LogManager::MessageLevel LogManager::parseLevel(const std::string &str) throw (Exception) { - if(str.empty()) +LogManager::MessageLevel LogManager::parseLevel(const UnicodeString &str) throw (Exception) { + static const UnicodeString DEBUG_LEVEL("debug"); + static const UnicodeString VERBOSE_LEVEL("verbose"); + static const UnicodeString DEFAULT_LEVEL("default"); + static const UnicodeString WARNING_LEVEL("warning"); + static const UnicodeString ERROR_LEVEL("error"); + static const UnicodeString CRITICAL_LEVEL("critical"); + + if(str.isEmpty()) return Logger::LOG_DEFAULT; - std::string lowerStr = boost::algorithm::to_lower_copy(str); - - if(lowerStr == "debug") + if(str.caseCompare(DEBUG_LEVEL, 0) == 0) return Logger::LOG_DEBUG; - else if(lowerStr == "verbose") + else if(str.caseCompare(VERBOSE_LEVEL, 0) == 0) return Logger::LOG_VERBOSE; - else if(lowerStr == "default") + else if(str.caseCompare(DEFAULT_LEVEL, 0) == 0) return Logger::LOG_DEFAULT; - else if(lowerStr == "warning") + else if(str.caseCompare(WARNING_LEVEL, 0) == 0) return Logger::LOG_WARNING; - else if(lowerStr == "error") + else if(str.caseCompare(ERROR_LEVEL, 0) == 0) return Logger::LOG_ERROR; - else if(lowerStr == "critical") + else if(str.caseCompare(CRITICAL_LEVEL, 0) == 0) return Logger::LOG_CRITICAL; else throw Exception(Exception::INVALID_INPUT); @@ -77,29 +82,29 @@ LogManager::~LogManager() { bool LogManager::handleConfigEntry(const ConfigEntry &entry, bool handled) { if(entry[0].getKey().matches("Log")) { if(entry[0][0].matches("Console")) { - if(entry[1].empty()) { + if(entry[1].isEmpty()) { registerLogger(consoleLogger); return true; } else if(entry[1].getKey().matches("Level")) { - if(entry[2].empty()) { + if(entry[2].isEmpty()) { try { - if(boost::algorithm::to_lower_copy(static_cast(entry[1][0])) == "remote") + if(entry[1][0].matches("remote")) consoleLogger->setRemoteLevel(parseLevel(entry[1][1])); else consoleLogger->setLevel(parseLevel(entry[1][0])); } catch(Core::Exception e) { - application->logf(Logger::LOG_WARNING, "Unknown log level '%s'.", entry[1][0].c_str()); + application->logf(Logger::LOG_WARNING, "Unknown log level '%s'.", entry[1][0].extract().c_str()); } return true; } } } - else if(entry[1].empty()) { + else if(entry[1].isEmpty()) { if(!handled) { - application->logf(Logger::LOG_WARNING, "Unknown logger '%s'.", entry[0][0].c_str()); + application->logf(Logger::LOG_WARNING, "Unknown logger '%s'.", entry[0][0].extract().c_str()); return true; } } diff --git a/src/Core/LogManager.h b/src/Core/LogManager.h index c569238..9b47a7e 100644 --- a/src/Core/LogManager.h +++ b/src/Core/LogManager.h @@ -25,6 +25,7 @@ #include "Configurable.h" #include "Exception.h" #include "Logger.h" +#include "UnicodeString.h" #include #include @@ -99,7 +100,7 @@ class MAD_CORE_EXPORT LogManager : public Configurable { virtual void configFinished(); public: - static MessageLevel parseLevel(const std::string &str) throw (Exception); + static MessageLevel parseLevel(const UnicodeString &str) throw (Exception); void log(MessageCategory category, MessageLevel level, boost::posix_time::ptime timestamp, const std::string &message, const std::string &source = std::string()); diff --git a/src/Core/Tokenizer.cpp b/src/Core/Tokenizer.cpp index 4043b39..93c853a 100644 --- a/src/Core/Tokenizer.cpp +++ b/src/Core/Tokenizer.cpp @@ -18,30 +18,32 @@ */ #include "Tokenizer.h" +#include +#include namespace Mad { namespace Core { -const std::string Tokenizer::delimiters = " \t\n\"'\\"; +const UnicodeString Tokenizer::delimiters(" \t\n\"'\\"); -std::vector Tokenizer::split(const std::string &str) { - std::vector ret; +std::vector Tokenizer::split(const UnicodeString &str) { + std::vector ret; - for(size_t s = 0; s < str.length();) { - size_t index = str.find_first_of(delimiters, s); - size_t length = (index == std::string::npos) ? std::string::npos : index-s; + for(boost::int32_t s = 0; s < str.length();) { + boost::int32_t index = str.findFirstOf(delimiters, s); + boost::int32_t length = (index < 0) ? -1 : index-s; ret.push_back(str.substr(s, length)); - if(index != std::string::npos) { - size_t index2 = str.find_first_not_of(delimiters, index); + if(index >= 0) { + boost::int32_t index2 = str.findFirstNotOf(delimiters, index); - length = (index2 == std::string::npos) ? std::string::npos : index2-index; + length = (index2 < 0) ? -1 : index2-index; ret.push_back(str.substr(index, length)); - if(index2 != std::string::npos) + if(index2 >= 0) s = index2; else break; @@ -53,48 +55,48 @@ std::vector Tokenizer::split(const std::string &str) { return ret; } -bool Tokenizer::tokenize(const std::string &str, std::vector &out) { - std::vector splitString = split(str); +bool Tokenizer::tokenize(const UnicodeString &str, std::vector &out) { + std::vector splitString = split(str); bool singleQuotes = false, doubleQuotes = false, escape = false; - std::string token; + UnicodeString token; bool forceToken = false; out.clear(); - for(std::vector::iterator s = splitString.begin(); s != splitString.end(); ++s) { + for(std::vector::iterator s = splitString.begin(); s != splitString.end(); ++s) { token += *s; escape = false; if(++s == splitString.end()) break; - for(std::string::iterator c = s->begin(); c != s->end(); ++c) { - if(*c == '\n' && escape) { + for(icu::StringCharacterIterator c(*s); c.hasNext(); c.next()) { + if(c.current() == '\n' && escape) { escape = false; if(doubleQuotes) continue; } - if(escape || (singleQuotes && *c != '\'')) { - token += *c; + if(escape || (singleQuotes && c.current() != '\'')) { + token += c.current(); escape = false; continue; } - switch(*c) { + switch(c.current()) { case ' ': case '\t': case '\n': if(doubleQuotes) { - token += *c; + token += c.current(); } else { - if(!token.empty() || forceToken) { + if(!token.isEmpty() || forceToken) { out.push_back(token); - token.clear(); + token.remove(); forceToken = false; } } @@ -107,7 +109,7 @@ bool Tokenizer::tokenize(const std::string &str, std::vector &out) case '\'': if(doubleQuotes) { - token += *c; + token += c.current(); } else { singleQuotes = !singleQuotes; @@ -121,7 +123,7 @@ bool Tokenizer::tokenize(const std::string &str, std::vector &out) } } - if(!token.empty() || forceToken) + if(!token.isEmpty() || forceToken) out.push_back(token); return !(singleQuotes || doubleQuotes || escape); diff --git a/src/Core/Tokenizer.h b/src/Core/Tokenizer.h index 637aa05..f65d44c 100644 --- a/src/Core/Tokenizer.h +++ b/src/Core/Tokenizer.h @@ -22,7 +22,7 @@ #include "export.h" -#include +#include "UnicodeString.h" #include namespace Mad { @@ -30,14 +30,14 @@ namespace Core { class MAD_CORE_EXPORT Tokenizer { private: - static const std::string delimiters; + static const UnicodeString delimiters; Tokenizer(); - static std::vector split(const std::string &str); + static std::vector split(const UnicodeString &str); public: - static bool tokenize(const std::string &str, std::vector &out); + static bool tokenize(const UnicodeString &str, std::vector &out); }; } diff --git a/src/Core/UnicodeString.cpp b/src/Core/UnicodeString.cpp new file mode 100644 index 0000000..ccace47 --- /dev/null +++ b/src/Core/UnicodeString.cpp @@ -0,0 +1,44 @@ +/* + * UnicodeString.cpp + * + * Copyright (C) 2009 Matthias Schiffer + * + * This program is free software: you can redistribute it and/or modify it + * under the terms of the GNU Lesser General Public License as published by the + * Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * See the GNU Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public License along + * with this program. If not, see . + */ + +#include "UnicodeString.h" + +namespace Mad { +namespace Core { + +boost::int32_t UnicodeString::findFirstOf(const UnicodeString &chars, boost::int32_t start) const { + for(boost::int32_t i = start; i < length(); ++i) { + if(chars.indexOf(charAt(i)) >= 0) + return i; + } + + return -1; +} + +boost::int32_t UnicodeString::findFirstNotOf(const UnicodeString &chars, boost::int32_t start) const { + for(boost::int32_t i = start; i < length(); ++i) { + if(chars.indexOf(charAt(i)) < 0) + return i; + } + + return -1; +} + +} +} diff --git a/src/Core/UnicodeString.h b/src/Core/UnicodeString.h new file mode 100644 index 0000000..aed53b6 --- /dev/null +++ b/src/Core/UnicodeString.h @@ -0,0 +1,64 @@ +/* + * UnicodeString.h + * + * Copyright (C) 2009 Matthias Schiffer + * + * This program is free software: you can redistribute it and/or modify it + * under the terms of the GNU Lesser General Public License as published by the + * Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * See the GNU Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public License along + * with this program. If not, see . + */ + +#ifndef MAD_CORE_UNICODESTRING_H_ +#define MAD_CORE_UNICODESTRING_H_ + +#include "export.h" + +#include +#include +#include + +#define U_USING_ICU_NAMESPACE 0 +#include + +namespace Mad { +namespace Core { + +class MAD_CORE_EXPORT UnicodeString : public icu::UnicodeString { + public: + UnicodeString() {} + UnicodeString(const icu::UnicodeString &str) : icu::UnicodeString(str) {} + UnicodeString(const char *str) : icu::UnicodeString(str) {} + + UnicodeString substr(boost::int32_t start, boost::int32_t length = -1) const { + if(length < 0) + return icu::UnicodeString(*this, start); + else + return icu::UnicodeString(*this, start, length); + } + + boost::int32_t findFirstOf(const UnicodeString &chars, boost::int32_t start = 0) const; + boost::int32_t findFirstNotOf(const UnicodeString &chars, boost::int32_t start = 0) const; + + std::string extract() const { + boost::uint32_t len = (boost::uint32_t)icu::UnicodeString::extract(0, length(), (char*)0, 0u); + + boost::scoped_array buf(new char[len]); + icu::UnicodeString::extract(0, length(), buf.get(), len); + + return std::string(buf.get(), len); + } +}; + +} +} + +#endif /* MAD_CORE_UNICODESTRING_H_ */ -- cgit v1.2.3