Split string encoding functionality to Windows and generic implementations

dev
Tomasz Kapuściński 2023-08-15 18:57:34 +02:00
parent 97ea5cc895
commit 4d11040dba
4 changed files with 376 additions and 191 deletions

View File

@ -5,6 +5,16 @@ target_sources(Colobot-Common PRIVATE
src/common/stringutils.h
)
if(WIN32)
target_sources(Colobot-Common PRIVATE
src/common/stringutils_windows.cpp
)
else()
target_sources(Colobot-Common PRIVATE
src/common/stringutils_generic.cpp
)
endif()
target_include_directories(Colobot-Common PUBLIC
$<BUILD_INTERFACE:${CMAKE_CURRENT_SOURCE_DIR}/src>
)

View File

@ -64,8 +64,6 @@ std::string VFormat(const char *fmt, va_list ap)
}
}
using UTF8Buffer = std::array<char, 8>;
} // anonymous namespace
std::string StrUtils::Format(const char *fmt, ...)
@ -138,192 +136,3 @@ void StrUtils::Trim(std::string& str)
TrimLeft(str);
TrimRight(str);
}
std::string StrUtils::UnicodeCharToUtf8(unsigned int ch)
{
UTF8Buffer buffer;
std::mbstate_t state = {};
int count = wcrtomb(buffer.data(), static_cast<wchar_t>(ch), &state);
if (count == 0) count = 1;
else if (count == -1) throw std::invalid_argument("Invalid character");
return std::string(buffer.data(), count);
}
std::string StrUtils::UnicodeStringToUtf8(const std::wstring &str)
{
std::string result;
result.reserve(str.size());
UTF8Buffer buffer;
for (const auto& ch : str)
{
std::mbstate_t state = {};
size_t len = std::wcrtomb(buffer.data(), ch, &state);
if (len == 0) len = 1;
else if (len == -1) throw std::invalid_argument("Invalid character");
result.append(buffer.data(), len);
}
return result;
}
unsigned int StrUtils::Utf8CharToUnicode(const std::string &ch)
{
if (ch.empty())
return 0;
std::mbstate_t state = {};
wchar_t c = 0;
int len = mbrtowc(&c, ch.data(), ch.size(), &state);
if (len == 0) return L'\0';
else if (len == -1) throw std::invalid_argument("Invalid character");
else if (len == -2) throw std::invalid_argument("Invalid character");
return c;
}
std::wstring StrUtils::Utf8StringToUnicode(const std::string &str)
{
std::wstring result;
result.reserve(str.size());
for (size_t i = 0; i < str.size();)
{
std::mbstate_t state = {};
wchar_t ch;
int len = std::mbrtowc(&ch, str.data() + i, str.size() - i, &state);
if (len == 0) len = 1;
else if (len == -1) throw std::invalid_argument("Invalid character");
else if (len == -2) throw std::invalid_argument("Invalid character");
i += len;
result += ch;
}
return result;
}
int StrUtils::Utf8CharSizeAt(const std::string &str, unsigned int pos)
{
if (pos >= str.size())
throw std::out_of_range("Index is greater than size");
std::mbstate_t state = {};
int len = std::mbrlen(str.data() + pos, str.size() - pos, &state);
if (len == 0) len = 1;
else if (len == -1) throw std::invalid_argument("Invalid character");
else if (len == -2) throw std::invalid_argument("Invalid character");
return len;
}
std::size_t StrUtils::Utf8StringLength(const std::string &str)
{
std::size_t result = 0;
for (size_t i = 0; i < str.size();)
{
std::mbstate_t state = {};
size_t count = std::mbrlen(str.data() + i, str.size() - i, &state);
if (count == 0) count = 1;
else if (count == -1) throw std::invalid_argument("Invalid character");
else if (count == -2) throw std::invalid_argument("Invalid character");
i += count;
++result;
}
return result;
}
bool StrUtils::isUtf8ContinuationByte(char c)
{
return (c & 0b1100'0000) == 0b1000'0000;
}
std::string StrUtils::ToLower(const std::string& text)
{
std::string result;
result.reserve(text.size());
UTF8Buffer buffer;
for (size_t i = 0; i < text.size();)
{
std::mbstate_t state = {};
wchar_t ch;
int len = std::mbrtowc(&ch, text.data() + i, text.size() - i, &state);
if (len == 0) len = 1;
else if (len == -1) throw std::invalid_argument("Invalid character");
else if (len == -2) throw std::invalid_argument("Invalid character");
i += len;
ch = std::towlower(ch);
state = {};
size_t count = std::wcrtomb(buffer.data(), ch, &state);
if (count == 0) count = 1;
else if (count == -1) throw std::invalid_argument("Invalid character");
result.append(buffer.data(), count);
}
return result;
}
std::string StrUtils::ToUpper(const std::string& text)
{
std::string result;
result.reserve(text.size());
UTF8Buffer buffer;
for (size_t i = 0; i < text.size();)
{
std::mbstate_t state = {};
wchar_t ch;
size_t len = std::mbrtowc(&ch, text.data() + i, text.size() - i, &state);
if (len == 0) len = 1;
else if (len == -1) throw std::invalid_argument("Invalid character");
else if (len == -2) throw std::invalid_argument("Invalid character");
i += len;
ch = std::towupper(ch);
state = {};
size_t count = std::wcrtomb(buffer.data(), ch, &state);
if (count == 0) count = 1;
else if (count == -1) throw std::invalid_argument("Invalid character");
result.append(buffer.data(), count);
}
return result;
}

View File

@ -0,0 +1,221 @@
/*
* This file is part of the Colobot: Gold Edition source code
* Copyright (C) 2001-2023, Daniel Roux, EPSITEC SA & TerranovaTeam
* http://epsitec.ch; http://colobot.info; http://github.com/colobot
*
* This program is free software: you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
* See the GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program. If not, see http://gnu.org/licenses
*/
#include "common/stringutils.h"
#include <algorithm>
#include <array>
#include <cstdarg>
#include <cstdio>
#include <cstdlib>
#include <cwchar>
#include <cwctype>
#include <stdexcept>
#include <vector>
using UTF8Buffer = std::array<char, 8>;
std::string StrUtils::UnicodeCharToUtf8(unsigned int ch)
{
UTF8Buffer buffer;
std::mbstate_t state = {};
int count = wcrtomb(buffer.data(), static_cast<wchar_t>(ch), &state);
if (count == 0) count = 1;
else if (count == -1) throw std::invalid_argument("Invalid character");
return std::string(buffer.data(), count);
}
std::string StrUtils::UnicodeStringToUtf8(const std::wstring &str)
{
std::string result;
result.reserve(str.size());
UTF8Buffer buffer;
for (const auto& ch : str)
{
std::mbstate_t state = {};
size_t len = std::wcrtomb(buffer.data(), ch, &state);
if (len == 0) len = 1;
else if (len == -1) throw std::invalid_argument("Invalid character");
result.append(buffer.data(), len);
}
return result;
}
unsigned int StrUtils::Utf8CharToUnicode(const std::string &ch)
{
if (ch.empty())
return 0;
std::mbstate_t state = {};
wchar_t c = 0;
int len = mbrtowc(&c, ch.data(), ch.size(), &state);
if (len == 0) return L'\0';
else if (len == -1) throw std::invalid_argument("Invalid character");
else if (len == -2) throw std::invalid_argument("Invalid character");
return c;
}
std::wstring StrUtils::Utf8StringToUnicode(const std::string &str)
{
std::wstring result;
result.reserve(str.size());
for (size_t i = 0; i < str.size();)
{
std::mbstate_t state = {};
wchar_t ch;
int len = std::mbrtowc(&ch, str.data() + i, str.size() - i, &state);
if (len == 0) len = 1;
else if (len == -1) throw std::invalid_argument("Invalid character");
else if (len == -2) throw std::invalid_argument("Invalid character");
i += len;
result += ch;
}
return result;
}
int StrUtils::Utf8CharSizeAt(const std::string &str, unsigned int pos)
{
if (pos >= str.size())
throw std::out_of_range("Index is greater than size");
std::mbstate_t state = {};
int len = std::mbrlen(str.data() + pos, str.size() - pos, &state);
if (len == 0) len = 1;
else if (len == -1) throw std::invalid_argument("Invalid character");
else if (len == -2) throw std::invalid_argument("Invalid character");
return len;
}
std::size_t StrUtils::Utf8StringLength(const std::string &str)
{
std::size_t result = 0;
for (size_t i = 0; i < str.size();)
{
std::mbstate_t state = {};
size_t count = std::mbrlen(str.data() + i, str.size() - i, &state);
if (count == 0) count = 1;
else if (count == -1) throw std::invalid_argument("Invalid character");
else if (count == -2) throw std::invalid_argument("Invalid character");
i += count;
++result;
}
return result;
}
bool StrUtils::isUtf8ContinuationByte(char c)
{
return (c & 0b1100'0000) == 0b1000'0000;
}
std::string StrUtils::ToLower(const std::string& text)
{
std::string result;
result.reserve(text.size());
UTF8Buffer buffer;
for (size_t i = 0; i < text.size();)
{
std::mbstate_t state = {};
wchar_t ch;
int len = std::mbrtowc(&ch, text.data() + i, text.size() - i, &state);
if (len == 0) len = 1;
else if (len == -1) throw std::invalid_argument("Invalid character");
else if (len == -2) throw std::invalid_argument("Invalid character");
i += len;
ch = std::towlower(ch);
state = {};
size_t count = std::wcrtomb(buffer.data(), ch, &state);
if (count == 0) count = 1;
else if (count == -1) throw std::invalid_argument("Invalid character");
result.append(buffer.data(), count);
}
return result;
}
std::string StrUtils::ToUpper(const std::string& text)
{
std::string result;
result.reserve(text.size());
UTF8Buffer buffer;
for (size_t i = 0; i < text.size();)
{
std::mbstate_t state = {};
wchar_t ch;
size_t len = std::mbrtowc(&ch, text.data() + i, text.size() - i, &state);
if (len == 0) len = 1;
else if (len == -1) throw std::invalid_argument("Invalid character");
else if (len == -2) throw std::invalid_argument("Invalid character");
i += len;
ch = std::towupper(ch);
state = {};
size_t count = std::wcrtomb(buffer.data(), ch, &state);
if (count == 0) count = 1;
else if (count == -1) throw std::invalid_argument("Invalid character");
result.append(buffer.data(), count);
}
return result;
}

View File

@ -0,0 +1,145 @@
/*
* This file is part of the Colobot: Gold Edition source code
* Copyright (C) 2001-2023, Daniel Roux, EPSITEC SA & TerranovaTeam
* http://epsitec.ch; http://colobot.info; http://github.com/colobot
*
* This program is free software: you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
* See the GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program. If not, see http://gnu.org/licenses
*/
#include "common/stringutils.h"
#include <algorithm>
#include <array>
#include <stdexcept>
#include <vector>
#include <Windows.h>
#include <stringapiset.h>
std::string StrUtils::UnicodeCharToUtf8(unsigned int ch)
{
wchar_t character = ch;
std::array<char, 8> buffer;
int length = WideCharToMultiByte(CP_UTF8, 0,
&character, 1, buffer.data(), buffer.size(), nullptr, nullptr);
if (length == 0) throw std::invalid_argument("Invalid character");
return std::string(buffer.data(), length);
}
std::string StrUtils::UnicodeStringToUtf8(const std::wstring &str)
{
int length = WideCharToMultiByte(CP_UTF8, 0,
str.c_str(), str.size(), nullptr, 0, nullptr, nullptr);
if (length == 0) throw std::invalid_argument("Invalid character");
std::string result(length, '\0');
length = WideCharToMultiByte(CP_UTF8, 0, str.c_str(), str.size(),
result.data(), result.size() + 1, nullptr, nullptr);
if (length == 0) throw std::invalid_argument("Invalid character");
return result;
}
unsigned int StrUtils::Utf8CharToUnicode(const std::string &ch)
{
if (ch.empty())
return 0;
wchar_t c = 0;
int length = MultiByteToWideChar(CP_UTF8, 0,
ch.c_str(), ch.size(), &c, 1);
if (length == 0) throw std::invalid_argument("Invalid character");
return c;
}
std::wstring StrUtils::Utf8StringToUnicode(const std::string &str)
{
int length = MultiByteToWideChar(CP_UTF8, 0,
str.c_str(), str.size(), nullptr, 0);
if (length == 0) throw std::invalid_argument("Invalid character");
std::wstring result(length, L'\0');
length = MultiByteToWideChar(CP_UTF8, 0,
str.c_str(), str.size(), result.data(), length);
if (length == 0) throw std::invalid_argument("Invalid character");
return result;
}
int StrUtils::Utf8CharSizeAt(const std::string &str, unsigned int pos)
{
if (pos >= str.size())
throw std::out_of_range("Index is greater than size");
std::array<wchar_t, 4> buffer;
int length = MultiByteToWideChar(CP_UTF8, 0,
str.data() + pos, min(4, str.size() - pos),
buffer.data(), buffer.size());
if (length == 0) throw std::invalid_argument("Invalid character");
length = WideCharToMultiByte(CP_UTF8, 0,
buffer.data(), 1, nullptr, 0, 0, 0);
if (length == 0) throw std::invalid_argument("Invalid character");
return length;
}
std::size_t StrUtils::Utf8StringLength(const std::string &str)
{
int length = MultiByteToWideChar(CP_UTF8, 0,
str.c_str(), str.size(), nullptr, 0);
if (length == 0) throw std::invalid_argument("Invalid character");
return length;
}
bool StrUtils::isUtf8ContinuationByte(char c)
{
return (c & 0b1100'0000) == 0b1000'0000;
}
std::string StrUtils::ToLower(const std::string& text)
{
std::wstring unicode = Utf8StringToUnicode(text);
CharLowerBuffW(unicode.data(), unicode.size());
return UnicodeStringToUtf8(unicode);
}
std::string StrUtils::ToUpper(const std::string& text)
{
std::wstring unicode = Utf8StringToUnicode(text);
CharUpperBuffW(unicode.data(), unicode.size());
return UnicodeStringToUtf8(unicode);
}