mirror of https://github.com/godotengine/godot
RegEx: Fix handling of unset/unknown capture groups
This commit is contained in:
parent
893bbdfde8
commit
0339032969
|
|
@ -289,25 +289,17 @@ TypedArray<RegExMatch> RegEx::search_all(const String &p_subject, int p_offset,
|
||||||
return result;
|
return result;
|
||||||
}
|
}
|
||||||
|
|
||||||
String RegEx::sub(const String &p_subject, const String &p_replacement, bool p_all, int p_offset, int p_end) const {
|
int RegEx::_sub(const String &p_subject, const String &p_replacement, int p_offset, int p_end, uint32_t p_flags, String &r_output) const {
|
||||||
ERR_FAIL_COND_V(!is_valid(), String());
|
// `safety_zone` is the number of chars we allocate in addition to the number of chars expected in order to
|
||||||
ERR_FAIL_COND_V_MSG(p_offset < 0, String(), "RegEx sub offset must be >= 0");
|
// guard against the PCRE API writing one additional `\0` at the end. PCRE's API docs are unclear on whether
|
||||||
|
// PCRE understands outlength in `pcre2_substitute(`) as counting an implicit additional terminating char or
|
||||||
// safety_zone is the number of chars we allocate in addition to the number of chars expected in order to
|
// not. Always allocating one char more than telling PCRE has us on the safe side.
|
||||||
// guard against the PCRE API writing one additional \0 at the end. PCRE's API docs are unclear on whether
|
|
||||||
// PCRE understands outlength in pcre2_substitute() as counting an implicit additional terminating char or
|
|
||||||
// not. always allocating one char more than telling PCRE has us on the safe side.
|
|
||||||
const int safety_zone = 1;
|
const int safety_zone = 1;
|
||||||
|
|
||||||
PCRE2_SIZE olength = p_subject.length() + 1; // space for output string and one terminating \0 character
|
PCRE2_SIZE olength = p_subject.length() + 1; // Space for output string and one terminating `\0` character.
|
||||||
Vector<char32_t> output;
|
Vector<char32_t> output;
|
||||||
output.resize(olength + safety_zone);
|
output.resize(olength + safety_zone);
|
||||||
|
|
||||||
uint32_t flags = PCRE2_SUBSTITUTE_OVERFLOW_LENGTH;
|
|
||||||
if (p_all) {
|
|
||||||
flags |= PCRE2_SUBSTITUTE_GLOBAL;
|
|
||||||
}
|
|
||||||
|
|
||||||
PCRE2_SIZE length = p_subject.length();
|
PCRE2_SIZE length = p_subject.length();
|
||||||
if (p_end >= 0 && (uint32_t)p_end < length) {
|
if (p_end >= 0 && (uint32_t)p_end < length) {
|
||||||
length = p_end;
|
length = p_end;
|
||||||
|
|
@ -322,22 +314,49 @@ String RegEx::sub(const String &p_subject, const String &p_replacement, bool p_a
|
||||||
|
|
||||||
pcre2_match_data_32 *match = pcre2_match_data_create_from_pattern_32(c, gctx);
|
pcre2_match_data_32 *match = pcre2_match_data_create_from_pattern_32(c, gctx);
|
||||||
|
|
||||||
int res = pcre2_substitute_32(c, s, length, p_offset, flags, match, mctx, r, p_replacement.length(), o, &olength);
|
int res = pcre2_substitute_32(c, s, length, p_offset, p_flags, match, mctx, r, p_replacement.length(), o, &olength);
|
||||||
|
|
||||||
if (res == PCRE2_ERROR_NOMEMORY) {
|
if (res == PCRE2_ERROR_NOMEMORY) {
|
||||||
output.resize(olength + safety_zone);
|
output.resize(olength + safety_zone);
|
||||||
o = (PCRE2_UCHAR32 *)output.ptrw();
|
o = (PCRE2_UCHAR32 *)output.ptrw();
|
||||||
res = pcre2_substitute_32(c, s, length, p_offset, flags, match, mctx, r, p_replacement.length(), o, &olength);
|
res = pcre2_substitute_32(c, s, length, p_offset, p_flags, match, mctx, r, p_replacement.length(), o, &olength);
|
||||||
}
|
}
|
||||||
|
|
||||||
pcre2_match_data_free_32(match);
|
pcre2_match_data_free_32(match);
|
||||||
pcre2_match_context_free_32(mctx);
|
pcre2_match_context_free_32(mctx);
|
||||||
|
|
||||||
if (res < 0) {
|
if (res >= 0) {
|
||||||
return String();
|
r_output = String(output.ptr(), olength) + p_subject.substr(length);
|
||||||
}
|
}
|
||||||
|
|
||||||
return String(output.ptr(), olength) + p_subject.substr(length);
|
return res;
|
||||||
|
}
|
||||||
|
|
||||||
|
String RegEx::sub(const String &p_subject, const String &p_replacement, bool p_all, int p_offset, int p_end) const {
|
||||||
|
ERR_FAIL_COND_V(!is_valid(), String());
|
||||||
|
ERR_FAIL_COND_V_MSG(p_offset < 0, String(), "RegEx sub offset must be >= 0");
|
||||||
|
|
||||||
|
uint32_t flags = PCRE2_SUBSTITUTE_OVERFLOW_LENGTH | PCRE2_SUBSTITUTE_UNSET_EMPTY;
|
||||||
|
if (p_all) {
|
||||||
|
flags |= PCRE2_SUBSTITUTE_GLOBAL;
|
||||||
|
}
|
||||||
|
|
||||||
|
String output;
|
||||||
|
const int res = _sub(p_subject, p_replacement, p_offset, p_end, flags, output);
|
||||||
|
|
||||||
|
if (res < 0) {
|
||||||
|
PCRE2_UCHAR32 buf[256];
|
||||||
|
pcre2_get_error_message_32(res, buf, 256);
|
||||||
|
String message = "PCRE2 Error: " + String((const char32_t *)buf);
|
||||||
|
ERR_PRINT(message.utf8());
|
||||||
|
|
||||||
|
if (res == PCRE2_ERROR_NOSUBSTRING) {
|
||||||
|
flags |= PCRE2_SUBSTITUTE_UNKNOWN_UNSET;
|
||||||
|
_sub(p_subject, p_replacement, p_offset, p_end, flags, output);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return output;
|
||||||
}
|
}
|
||||||
|
|
||||||
bool RegEx::is_valid() const {
|
bool RegEx::is_valid() const {
|
||||||
|
|
|
||||||
|
|
@ -78,6 +78,8 @@ class RegEx : public RefCounted {
|
||||||
|
|
||||||
void _pattern_info(uint32_t what, void *where) const;
|
void _pattern_info(uint32_t what, void *where) const;
|
||||||
|
|
||||||
|
int _sub(const String &p_subject, const String &p_replacement, int p_offset, int p_end, uint32_t p_flags, String &r_output) const;
|
||||||
|
|
||||||
protected:
|
protected:
|
||||||
static void _bind_methods();
|
static void _bind_methods();
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -145,6 +145,15 @@ TEST_CASE("[RegEx] Substitution") {
|
||||||
CHECK(re5.sub(s5, "cc", true, 0, 2) == "ccccaa");
|
CHECK(re5.sub(s5, "cc", true, 0, 2) == "ccccaa");
|
||||||
CHECK(re5.sub(s5, "cc", true, 1, 3) == "acccca");
|
CHECK(re5.sub(s5, "cc", true, 1, 3) == "acccca");
|
||||||
CHECK(re5.sub(s5, "", true, 0, 2) == "aa");
|
CHECK(re5.sub(s5, "", true, 0, 2) == "aa");
|
||||||
|
|
||||||
|
const String s6 = "property get_property set_property";
|
||||||
|
|
||||||
|
RegEx re6("(get_|set_)?property");
|
||||||
|
REQUIRE(re6.is_valid());
|
||||||
|
CHECK(re6.sub(s6, "$1new_property", true) == "new_property get_new_property set_new_property");
|
||||||
|
ERR_PRINT_OFF;
|
||||||
|
CHECK(re6.sub(s6, "$5new_property", true) == "new_property new_property new_property");
|
||||||
|
ERR_PRINT_ON;
|
||||||
}
|
}
|
||||||
|
|
||||||
TEST_CASE("[RegEx] Substitution with empty input and/or replacement") {
|
TEST_CASE("[RegEx] Substitution with empty input and/or replacement") {
|
||||||
|
|
|
||||||
Loading…
Reference in New Issue