summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorliamwhite <liamwhite@users.noreply.github.com>2023-10-07 12:49:27 -0400
committerGitHub <noreply@github.com>2023-10-07 12:49:27 -0400
commit0e9b839b6f5e10ea35ee3db79226fcc535c90064 (patch)
tree08825c9526eaf9bd8f981ddbd84e44ad36f3a877
parent15a5bdd9794536965c3c79535f93c951df86f439 (diff)
parent38394f36d78f22ca75acf8275e86d25faecd0e8d (diff)
Merge pull request #11648 from liamwhite/unicode-nonsense
gdbserver: use numeric character references for unicode
-rw-r--r--src/common/string_util.cpp5
-rw-r--r--src/common/string_util.h1
-rw-r--r--src/core/debugger/gdbstub.cpp17
3 files changed, 21 insertions, 2 deletions
diff --git a/src/common/string_util.cpp b/src/common/string_util.cpp
index feab1653d..4c7aba3f5 100644
--- a/src/common/string_util.cpp
+++ b/src/common/string_util.cpp
@@ -135,6 +135,11 @@ std::u16string UTF8ToUTF16(std::string_view input) {
return convert.from_bytes(input.data(), input.data() + input.size());
}
+std::u32string UTF8ToUTF32(std::string_view input) {
+ std::wstring_convert<std::codecvt_utf8<char32_t>, char32_t> convert;
+ return convert.from_bytes(input.data(), input.data() + input.size());
+}
+
#ifdef _WIN32
static std::wstring CPToUTF16(u32 code_page, std::string_view input) {
const auto size =
diff --git a/src/common/string_util.h b/src/common/string_util.h
index c351f1a0c..9da1ca4e9 100644
--- a/src/common/string_util.h
+++ b/src/common/string_util.h
@@ -38,6 +38,7 @@ bool SplitPath(const std::string& full_path, std::string* _pPath, std::string* _
[[nodiscard]] std::string UTF16ToUTF8(std::u16string_view input);
[[nodiscard]] std::u16string UTF8ToUTF16(std::string_view input);
+[[nodiscard]] std::u32string UTF8ToUTF32(std::string_view input);
#ifdef _WIN32
[[nodiscard]] std::string UTF16ToUTF8(std::wstring_view input);
diff --git a/src/core/debugger/gdbstub.cpp b/src/core/debugger/gdbstub.cpp
index e55831f27..82964f0a1 100644
--- a/src/core/debugger/gdbstub.cpp
+++ b/src/core/debugger/gdbstub.cpp
@@ -2,6 +2,8 @@
// SPDX-License-Identifier: GPL-2.0-or-later
#include <atomic>
+#include <codecvt>
+#include <locale>
#include <numeric>
#include <optional>
#include <thread>
@@ -12,6 +14,7 @@
#include "common/logging/log.h"
#include "common/scope_exit.h"
#include "common/settings.h"
+#include "common/string_util.h"
#include "core/arm/arm_interface.h"
#include "core/core.h"
#include "core/debugger/gdbstub.h"
@@ -68,10 +71,16 @@ static std::string EscapeGDB(std::string_view data) {
}
static std::string EscapeXML(std::string_view data) {
+ std::u32string converted = U"[Encoding error]";
+ try {
+ converted = Common::UTF8ToUTF32(data);
+ } catch (std::range_error&) {
+ }
+
std::string escaped;
escaped.reserve(data.size());
- for (char c : data) {
+ for (char32_t c : converted) {
switch (c) {
case '&':
escaped += "&amp;";
@@ -86,7 +95,11 @@ static std::string EscapeXML(std::string_view data) {
escaped += "&gt;";
break;
default:
- escaped += c;
+ if (c > 0x7f) {
+ escaped += fmt::format("&#{};", static_cast<u32>(c));
+ } else {
+ escaped += static_cast<char>(c);
+ }
break;
}
}