1
0
Fork 0

Add `_verified_utf32_char` to `ustring.cpp`, to reduce duplicate logic.

This commit is contained in:
Lukas Tenbrink 2025-01-02 17:09:46 +01:00
parent 2582793d40
commit 0765657a0e
2 changed files with 27 additions and 45 deletions

View File

@ -327,6 +327,25 @@ void String::parse_utf32(const StrRange<char32_t> &p_cstr) {
copy_from_unchecked(p_cstr.c_str, p_cstr.len);
}
constexpr char32_t _verified_utf32_char(char32_t p_char, bool *failure = nullptr) {
if ((p_char & 0xfffff800) == 0xd800) {
String::print_unicode_error(vformat("Unpaired surrogate (%x)", (uint32_t)p_char), true);
if (failure) {
*failure = true;
}
return String::_replacement_char;
}
if (p_char > 0x10ffff) {
String::print_unicode_error(vformat("Invalid unicode codepoint (%x)", (uint32_t)p_char), true);
if (failure) {
*failure = true;
}
return String::_replacement_char;
}
return p_char;
}
void String::parse_utf32(const char32_t &p_char) {
if (p_char == 0) {
print_unicode_error("NUL character", true);
@ -336,17 +355,7 @@ void String::parse_utf32(const char32_t &p_char) {
resize(2);
char32_t *dst = ptrw();
if ((p_char & 0xfffff800) == 0xd800) {
print_unicode_error(vformat("Unpaired surrogate (%x)", (uint32_t)p_char));
dst[0] = _replacement_char;
} else if (p_char > 0x10ffff) {
print_unicode_error(vformat("Invalid unicode codepoint (%x)", (uint32_t)p_char));
dst[0] = _replacement_char;
} else {
dst[0] = p_char;
}
dst[0] = _verified_utf32_char(p_char);
dst[1] = 0;
}
@ -361,18 +370,7 @@ void String::copy_from_unchecked(const char32_t *p_char, const int p_length) {
char32_t *dst = ptrw();
for (; p_char < end; ++p_char, ++dst) {
const char32_t chr = *p_char;
if ((chr & 0xfffff800) == 0xd800) {
print_unicode_error(vformat("Unpaired surrogate (%x)", (uint32_t)chr));
*dst = _replacement_char;
continue;
}
if (chr > 0x10ffff) {
print_unicode_error(vformat("Invalid unicode codepoint (%x)", (uint32_t)chr));
*dst = _replacement_char;
continue;
}
*dst = chr;
*dst = _verified_utf32_char(*p_char);
}
*dst = 0;
}
@ -490,16 +488,7 @@ String &String::operator+=(char32_t p_char) {
resize(lhs_len + 2);
char32_t *dst = ptrw();
if ((p_char & 0xfffff800) == 0xd800) {
print_unicode_error(vformat("Unpaired surrogate (%x)", (uint32_t)p_char));
dst[lhs_len] = _replacement_char;
} else if (p_char > 0x10ffff) {
print_unicode_error(vformat("Invalid unicode codepoint (%x)", (uint32_t)p_char));
dst[lhs_len] = _replacement_char;
} else {
dst[lhs_len] = p_char;
}
dst[lhs_len] = _verified_utf32_char(p_char);
dst[lhs_len + 1] = 0;
return *this;
@ -1923,7 +1912,7 @@ Vector<uint8_t> String::hex_decode() const {
#undef HEX_TO_BYTE
}
void String::print_unicode_error(const String &p_message, bool p_critical) const {
void String::print_unicode_error(const String &p_message, bool p_critical) {
if (p_critical) {
print_error(vformat(U"Unicode parsing error, some characters were replaced with <20> (U+FFFD): %s", p_message));
} else {
@ -2111,16 +2100,8 @@ Error String::parse_utf8(const char *p_utf8, int p_len, bool p_skip_cr) {
print_unicode_error("NUL character", true);
decode_failed = true;
unichar = _replacement_char;
} else if ((unichar & 0xfffff800) == 0xd800) {
print_unicode_error(vformat("Unpaired surrogate (%x)", unichar), true);
decode_failed = true;
unichar = _replacement_char;
} else if (unichar > 0x10ffff) {
print_unicode_error(vformat("Invalid unicode codepoint (%x)", unichar), true);
decode_failed = true;
unichar = _replacement_char;
}
*(dst++) = unichar;
*(dst++) = _verified_utf32_char(unichar, &decode_failed);
}
}
}

View File

@ -264,7 +264,6 @@ protected:
class String {
CowData<char32_t> _cowdata;
static const char32_t _null;
static const char32_t _replacement_char;
// Known-length copy.
void parse_latin1(const StrRange<char> &p_cstr);
@ -321,6 +320,8 @@ class String {
String _camelcase_to_underscore() const;
public:
static const char32_t _replacement_char;
enum {
npos = -1 ///<for "some" compatibility with std::string (npos is a huge value in std::string)
};
@ -396,7 +397,7 @@ public:
bool is_valid_string() const;
/* debug, error messages */
void print_unicode_error(const String &p_message, bool p_critical = false) const;
static void print_unicode_error(const String &p_message, bool p_critical = false);
/* complex helpers */
String substr(int p_from, int p_chars = -1) const;