summaryrefslogtreecommitdiff
path: root/src/common/string_util.cpp
diff options
context:
space:
mode:
Diffstat (limited to 'src/common/string_util.cpp')
-rw-r--r--src/common/string_util.cpp190
1 files changed, 126 insertions, 64 deletions
diff --git a/src/common/string_util.cpp b/src/common/string_util.cpp
index 61f0939c4..6d9612fb5 100644
--- a/src/common/string_util.cpp
+++ b/src/common/string_util.cpp
@@ -2,13 +2,14 @@
// Licensed under GPLv2
// Refer to the license.txt file included.
-#include <algorithm>
+#include <boost/range/algorithm.hpp>
#include "common/common.h"
#include "common/string_util.h"
#ifdef _WIN32
#include <Windows.h>
+ #include <codecvt>
#else
#include <iconv.h>
#endif
@@ -17,20 +18,20 @@ namespace Common {
/// Make a string lowercase
std::string ToLower(std::string str) {
- std::transform(str.begin(), str.end(), str.begin(), ::tolower);
+ boost::transform(str, str.begin(), ::tolower);
return str;
}
/// Make a string uppercase
std::string ToUpper(std::string str) {
- std::transform(str.begin(), str.end(), str.begin(), ::toupper);
+ boost::transform(str, str.begin(), ::toupper);
return str;
}
// faster than sscanf
bool AsciiToHex(const char* _szValue, u32& result)
{
- char *endptr = NULL;
+ char *endptr = nullptr;
const u32 value = strtoul(_szValue, &endptr, 16);
if (!endptr || *endptr)
@@ -68,7 +69,7 @@ bool CharArrayFromFormatV(char* out, int outsize, const char* format, va_list ar
// will be present in the middle of a multibyte sequence.
//
// This is why we lookup an ANSI (cp1252) locale here and use _vsnprintf_l.
- static locale_t c_locale = NULL;
+ static locale_t c_locale = nullptr;
if (!c_locale)
c_locale = _create_locale(LC_ALL, ".1252");
writtenCount = _vsnprintf_l(out, outsize, format, c_locale, args);
@@ -91,7 +92,7 @@ bool CharArrayFromFormatV(char* out, int outsize, const char* format, va_list ar
std::string StringFromFormat(const char* format, ...)
{
va_list args;
- char *buf = NULL;
+ char *buf = nullptr;
#ifdef _WIN32
int required = 0;
@@ -106,7 +107,7 @@ std::string StringFromFormat(const char* format, ...)
#else
va_start(args, format);
if (vasprintf(&buf, format, args) < 0)
- ERROR_LOG(COMMON, "Unable to allocate memory for string");
+ LOG_ERROR(Common, "Unable to allocate memory for string");
va_end(args);
std::string temp = buf;
@@ -120,11 +121,11 @@ std::string ArrayToString(const u8 *data, u32 size, int line_len, bool spaces)
{
std::ostringstream oss;
oss << std::setfill('0') << std::hex;
-
+
for (int line = 0; size; ++data, --size)
{
oss << std::setw(2) << (int)*data;
-
+
if (line_len == ++line)
{
oss << '\n';
@@ -161,13 +162,13 @@ std::string StripQuotes(const std::string& s)
bool TryParse(const std::string &str, u32 *const output)
{
- char *endptr = NULL;
+ char *endptr = nullptr;
// Reset errno to a value other than ERANGE
errno = 0;
unsigned long value = strtoul(str.c_str(), &endptr, 0);
-
+
if (!endptr || *endptr)
return false;
@@ -293,7 +294,7 @@ std::string ReplaceAll(std::string result, const std::string& src, const std::st
//#include <string>
//#include <assert.h>
-const char HEX2DEC[256] =
+const char HEX2DEC[256] =
{
/* 0 1 2 3 4 5 6 7 8 9 A B C D E F */
/* 0 */ 16,16,16,16, 16,16,16,16, 16,16,16,16, 16,16,16,16,
@@ -326,7 +327,7 @@ std::string UriDecode(const std::string & sSrc)
const unsigned char * pSrc = (const unsigned char *)sSrc.c_str();
const size_t SRC_LEN = sSrc.length();
const unsigned char * const SRC_END = pSrc + SRC_LEN;
- const unsigned char * const SRC_LAST_DEC = SRC_END - 2; // last decodable '%'
+ const unsigned char * const SRC_LAST_DEC = SRC_END - 2; // last decodable '%'
char * const pStart = new char[SRC_LEN];
char * pEnd = pStart;
@@ -393,7 +394,7 @@ std::string UriEncode(const std::string & sSrc)
for (; pSrc < SRC_END; ++pSrc)
{
- if (SAFE[*pSrc])
+ if (SAFE[*pSrc])
*pEnd++ = *pSrc;
else
{
@@ -411,7 +412,19 @@ std::string UriEncode(const std::string & sSrc)
#ifdef _WIN32
-std::string UTF16ToUTF8(const std::wstring& input)
+std::string UTF16ToUTF8(const std::u16string& input)
+{
+ std::wstring_convert<std::codecvt_utf8_utf16<char16_t>, char16_t> convert;
+ return convert.to_bytes(input);
+}
+
+std::u16string UTF8ToUTF16(const std::string& input)
+{
+ std::wstring_convert<std::codecvt_utf8_utf16<char16_t>, char16_t> convert;
+ return convert.from_bytes(input);
+}
+
+static std::string UTF16ToUTF8(const std::wstring& input)
{
auto const size = WideCharToMultiByte(CP_UTF8, 0, input.data(), input.size(), nullptr, 0, nullptr, nullptr);
@@ -424,7 +437,7 @@ std::string UTF16ToUTF8(const std::wstring& input)
return output;
}
-std::wstring CPToUTF16(u32 code_page, const std::string& input)
+static std::wstring CPToUTF16(u32 code_page, const std::string& input)
{
auto const size = MultiByteToWideChar(code_page, 0, input.data(), input.size(), nullptr, 0);
@@ -437,7 +450,7 @@ std::wstring CPToUTF16(u32 code_page, const std::string& input)
return output;
}
-std::wstring UTF8ToUTF16(const std::string& input)
+std::wstring UTF8ToUTF16W(const std::string &input)
{
return CPToUTF16(CP_UTF8, input);
}
@@ -455,61 +468,123 @@ std::string CP1252ToUTF8(const std::string& input)
#else
template <typename T>
-std::string CodeToUTF8(const char* fromcode, const std::basic_string<T>& input)
+static std::string CodeToUTF8(const char* fromcode, const std::basic_string<T>& input)
{
std::string result;
iconv_t const conv_desc = iconv_open("UTF-8", fromcode);
- if ((iconv_t)-1 == conv_desc)
+ if ((iconv_t)(-1) == conv_desc)
{
- ERROR_LOG(COMMON, "Iconv initialization failure [%s]: %s", fromcode, strerror(errno));
+ LOG_ERROR(Common, "Iconv initialization failure [%s]: %s", fromcode, strerror(errno));
+ iconv_close(conv_desc);
+ return {};
}
- else
- {
- size_t const in_bytes = sizeof(T) * input.size();
- size_t const out_buffer_size = 4 * in_bytes;
- std::string out_buffer;
- out_buffer.resize(out_buffer_size);
+ const size_t in_bytes = sizeof(T) * input.size();
+ // Multiply by 4, which is the max number of bytes to encode a codepoint
+ const size_t out_buffer_size = 4 * in_bytes;
- auto src_buffer = &input[0];
- size_t src_bytes = in_bytes;
- auto dst_buffer = &out_buffer[0];
- size_t dst_bytes = out_buffer.size();
+ std::string out_buffer;
+ out_buffer.resize(out_buffer_size);
- while (src_bytes != 0)
- {
- size_t const iconv_result = iconv(conv_desc, (char**)(&src_buffer), &src_bytes,
- &dst_buffer, &dst_bytes);
+ auto src_buffer = &input[0];
+ size_t src_bytes = in_bytes;
+ auto dst_buffer = &out_buffer[0];
+ size_t dst_bytes = out_buffer.size();
- if ((size_t)-1 == iconv_result)
+ while (0 != src_bytes)
+ {
+ size_t const iconv_result = iconv(conv_desc, (char**)(&src_buffer), &src_bytes,
+ &dst_buffer, &dst_bytes);
+
+ if (static_cast<size_t>(-1) == iconv_result)
+ {
+ if (EILSEQ == errno || EINVAL == errno)
{
- if (EILSEQ == errno || EINVAL == errno)
- {
- // Try to skip the bad character
- if (src_bytes != 0)
- {
- --src_bytes;
- ++src_buffer;
- }
- }
- else
+ // Try to skip the bad character
+ if (0 != src_bytes)
{
- ERROR_LOG(COMMON, "iconv failure [%s]: %s", fromcode, strerror(errno));
- break;
+ --src_bytes;
+ ++src_buffer;
}
}
+ else
+ {
+ LOG_ERROR(Common, "iconv failure [%s]: %s", fromcode, strerror(errno));
+ break;
+ }
}
+ }
+
+ out_buffer.resize(out_buffer_size - dst_bytes);
+ out_buffer.swap(result);
+
+ iconv_close(conv_desc);
+
+ return result;
+}
+
+std::u16string UTF8ToUTF16(const std::string& input)
+{
+ std::u16string result;
- out_buffer.resize(out_buffer_size - dst_bytes);
- out_buffer.swap(result);
-
+ iconv_t const conv_desc = iconv_open("UTF-16LE", "UTF-8");
+ if ((iconv_t)(-1) == conv_desc)
+ {
+ LOG_ERROR(Common, "Iconv initialization failure [UTF-8]: %s", strerror(errno));
iconv_close(conv_desc);
+ return {};
}
-
+
+ const size_t in_bytes = sizeof(char) * input.size();
+ // Multiply by 4, which is the max number of bytes to encode a codepoint
+ const size_t out_buffer_size = 4 * sizeof(char16_t) * in_bytes;
+
+ std::u16string out_buffer;
+ out_buffer.resize(out_buffer_size);
+
+ char* src_buffer = const_cast<char*>(&input[0]);
+ size_t src_bytes = in_bytes;
+ char* dst_buffer = (char*)(&out_buffer[0]);
+ size_t dst_bytes = out_buffer.size();
+
+ while (0 != src_bytes)
+ {
+ size_t const iconv_result = iconv(conv_desc, &src_buffer, &src_bytes,
+ &dst_buffer, &dst_bytes);
+
+ if (static_cast<size_t>(-1) == iconv_result)
+ {
+ if (EILSEQ == errno || EINVAL == errno)
+ {
+ // Try to skip the bad character
+ if (0 != src_bytes)
+ {
+ --src_bytes;
+ ++src_buffer;
+ }
+ }
+ else
+ {
+ LOG_ERROR(Common, "iconv failure [UTF-8]: %s", strerror(errno));
+ break;
+ }
+ }
+ }
+
+ out_buffer.resize(out_buffer_size - dst_bytes);
+ out_buffer.swap(result);
+
+ iconv_close(conv_desc);
+
return result;
}
+std::string UTF16ToUTF8(const std::u16string& input)
+{
+ return CodeToUTF8("UTF-16LE", input);
+}
+
std::string CP1252ToUTF8(const std::string& input)
{
//return CodeToUTF8("CP1252//TRANSLIT", input);
@@ -523,19 +598,6 @@ std::string SHIFTJISToUTF8(const std::string& input)
return CodeToUTF8("SJIS", input);
}
-std::string UTF16ToUTF8(const std::wstring& input)
-{
- std::string result =
- // CodeToUTF8("UCS-2", input);
- // CodeToUTF8("UCS-2LE", input);
- // CodeToUTF8("UTF-16", input);
- CodeToUTF8("UTF-16LE", input);
-
- // TODO: why is this needed?
- result.erase(std::remove(result.begin(), result.end(), 0x00), result.end());
- return result;
-}
-
#endif
}