1
0
Fork 0

Merge pull request #98395 from Repiteo/core/char-range-utils-update

Core: Update `char_range.inc` to Unicode 16
This commit is contained in:
Thaddeus Crews 2024-10-25 13:04:07 -05:00
commit d3298fe738
No known key found for this signature in database
GPG Key ID: 62181B86FE9E5D84
2 changed files with 442 additions and 237 deletions

File diff suppressed because it is too large Load Diff

View File

@ -38,97 +38,97 @@
#define BSEARCH_CHAR_RANGE(m_array) \ #define BSEARCH_CHAR_RANGE(m_array) \
int low = 0; \ int low = 0; \
int high = sizeof(m_array) / sizeof(m_array[0]) - 1; \ int high = sizeof(m_array) / sizeof(m_array[0]) - 1; \
int middle; \ int middle = (low + high) / 2; \
\ \
while (low <= high) { \ while (low <= high) { \
middle = (low + high) / 2; \ if (p_char < m_array[middle].start) { \
\
if (c < m_array[middle].start) { \
high = middle - 1; \ high = middle - 1; \
} else if (c > m_array[middle].end) { \ } else if (p_char > m_array[middle].end) { \
low = middle + 1; \ low = middle + 1; \
} else { \ } else { \
return true; \ return true; \
} \ } \
\
middle = (low + high) / 2; \
} \ } \
\ \
return false return false
static _FORCE_INLINE_ bool is_unicode_identifier_start(char32_t c) { constexpr bool is_unicode_identifier_start(char32_t p_char) {
BSEARCH_CHAR_RANGE(xid_start); BSEARCH_CHAR_RANGE(xid_start);
} }
static _FORCE_INLINE_ bool is_unicode_identifier_continue(char32_t c) { constexpr bool is_unicode_identifier_continue(char32_t p_char) {
BSEARCH_CHAR_RANGE(xid_continue); BSEARCH_CHAR_RANGE(xid_continue);
} }
static _FORCE_INLINE_ bool is_unicode_upper_case(char32_t c) { constexpr bool is_unicode_upper_case(char32_t p_char) {
BSEARCH_CHAR_RANGE(uppercase_letter); BSEARCH_CHAR_RANGE(uppercase_letter);
} }
static _FORCE_INLINE_ bool is_unicode_lower_case(char32_t c) { constexpr bool is_unicode_lower_case(char32_t p_char) {
BSEARCH_CHAR_RANGE(lowercase_letter); BSEARCH_CHAR_RANGE(lowercase_letter);
} }
static _FORCE_INLINE_ bool is_unicode_letter(char32_t c) { constexpr bool is_unicode_letter(char32_t p_char) {
BSEARCH_CHAR_RANGE(unicode_letter); BSEARCH_CHAR_RANGE(unicode_letter);
} }
#undef BSEARCH_CHAR_RANGE #undef BSEARCH_CHAR_RANGE
static _FORCE_INLINE_ bool is_ascii_upper_case(char32_t c) { constexpr bool is_ascii_upper_case(char32_t p_char) {
return (c >= 'A' && c <= 'Z'); return (p_char >= 'A' && p_char <= 'Z');
} }
static _FORCE_INLINE_ bool is_ascii_lower_case(char32_t c) { constexpr bool is_ascii_lower_case(char32_t p_char) {
return (c >= 'a' && c <= 'z'); return (p_char >= 'a' && p_char <= 'z');
} }
static _FORCE_INLINE_ bool is_digit(char32_t c) { constexpr bool is_digit(char32_t p_char) {
return (c >= '0' && c <= '9'); return (p_char >= '0' && p_char <= '9');
} }
static _FORCE_INLINE_ bool is_hex_digit(char32_t c) { constexpr bool is_hex_digit(char32_t p_char) {
return (is_digit(c) || (c >= 'a' && c <= 'f') || (c >= 'A' && c <= 'F')); return (is_digit(p_char) || (p_char >= 'a' && p_char <= 'f') || (p_char >= 'A' && p_char <= 'F'));
} }
static _FORCE_INLINE_ bool is_binary_digit(char32_t c) { constexpr bool is_binary_digit(char32_t p_char) {
return (c == '0' || c == '1'); return (p_char == '0' || p_char == '1');
} }
static _FORCE_INLINE_ bool is_ascii_alphabet_char(char32_t c) { constexpr bool is_ascii_alphabet_char(char32_t p_char) {
return (c >= 'a' && c <= 'z') || (c >= 'A' && c <= 'Z'); return (p_char >= 'a' && p_char <= 'z') || (p_char >= 'A' && p_char <= 'Z');
} }
static _FORCE_INLINE_ bool is_ascii_alphanumeric_char(char32_t c) { constexpr bool is_ascii_alphanumeric_char(char32_t p_char) {
return (c >= 'a' && c <= 'z') || (c >= 'A' && c <= 'Z') || (c >= '0' && c <= '9'); return (p_char >= 'a' && p_char <= 'z') || (p_char >= 'A' && p_char <= 'Z') || (p_char >= '0' && p_char <= '9');
} }
static _FORCE_INLINE_ bool is_ascii_identifier_char(char32_t c) { constexpr bool is_ascii_identifier_char(char32_t p_char) {
return (c >= 'a' && c <= 'z') || (c >= 'A' && c <= 'Z') || (c >= '0' && c <= '9') || c == '_'; return (p_char >= 'a' && p_char <= 'z') || (p_char >= 'A' && p_char <= 'Z') || (p_char >= '0' && p_char <= '9') || p_char == '_';
} }
static _FORCE_INLINE_ bool is_symbol(char32_t c) { constexpr bool is_symbol(char32_t p_char) {
return c != '_' && ((c >= '!' && c <= '/') || (c >= ':' && c <= '@') || (c >= '[' && c <= '`') || (c >= '{' && c <= '~') || c == '\t' || c == ' '); return p_char != '_' && ((p_char >= '!' && p_char <= '/') || (p_char >= ':' && p_char <= '@') || (p_char >= '[' && p_char <= '`') || (p_char >= '{' && p_char <= '~') || p_char == '\t' || p_char == ' ');
} }
static _FORCE_INLINE_ bool is_control(char32_t p_char) { constexpr bool is_control(char32_t p_char) {
return (p_char <= 0x001f) || (p_char >= 0x007f && p_char <= 0x009f); return (p_char <= 0x001f) || (p_char >= 0x007f && p_char <= 0x009f);
} }
static _FORCE_INLINE_ bool is_whitespace(char32_t p_char) { constexpr bool is_whitespace(char32_t p_char) {
return (p_char == ' ') || (p_char == 0x00a0) || (p_char == 0x1680) || (p_char >= 0x2000 && p_char <= 0x200a) || (p_char == 0x202f) || (p_char == 0x205f) || (p_char == 0x3000) || (p_char == 0x2028) || (p_char == 0x2029) || (p_char >= 0x0009 && p_char <= 0x000d) || (p_char == 0x0085); return (p_char == ' ') || (p_char == 0x00a0) || (p_char == 0x1680) || (p_char >= 0x2000 && p_char <= 0x200a) || (p_char == 0x202f) || (p_char == 0x205f) || (p_char == 0x3000) || (p_char == 0x2028) || (p_char == 0x2029) || (p_char >= 0x0009 && p_char <= 0x000d) || (p_char == 0x0085);
} }
static _FORCE_INLINE_ bool is_linebreak(char32_t p_char) { constexpr bool is_linebreak(char32_t p_char) {
return (p_char >= 0x000a && p_char <= 0x000d) || (p_char == 0x0085) || (p_char == 0x2028) || (p_char == 0x2029); return (p_char >= 0x000a && p_char <= 0x000d) || (p_char == 0x0085) || (p_char == 0x2028) || (p_char == 0x2029);
} }
static _FORCE_INLINE_ bool is_punct(char32_t p_char) { constexpr bool is_punct(char32_t p_char) {
return (p_char >= ' ' && p_char <= '/') || (p_char >= ':' && p_char <= '@') || (p_char >= '[' && p_char <= '^') || (p_char == '`') || (p_char >= '{' && p_char <= '~') || (p_char >= 0x2000 && p_char <= 0x206f) || (p_char >= 0x3000 && p_char <= 0x303f); return (p_char >= ' ' && p_char <= '/') || (p_char >= ':' && p_char <= '@') || (p_char >= '[' && p_char <= '^') || (p_char == '`') || (p_char >= '{' && p_char <= '~') || (p_char >= 0x2000 && p_char <= 0x206f) || (p_char >= 0x3000 && p_char <= 0x303f);
} }
static _FORCE_INLINE_ bool is_underscore(char32_t p_char) { constexpr bool is_underscore(char32_t p_char) {
return (p_char == '_'); return (p_char == '_');
} }