1
0
Fork 0

Merge pull request #115121 from Chubercik/unicode_17.0.0

Update various Unicode-related files to version 17.0.0
This commit is contained in:
Thaddeus Crews 2026-01-28 12:27:08 -06:00
commit 64b962215d
No known key found for this signature in database
GPG Key ID: 8C6E5FEB5FC03CCC
6 changed files with 149 additions and 62 deletions

View File

@ -35,7 +35,7 @@
#include "core/typedefs.h"
// Unicode Derived Core Properties
// Source: https://www.unicode.org/Public/16.0.0/ucd/DerivedCoreProperties.txt
// Source: https://www.unicode.org/Public/17.0.0/ucd/DerivedCoreProperties.txt
struct CharRange {
char32_t start;
@ -94,7 +94,7 @@ constexpr inline CharRange xid_start[] = {
{ 0x840, 0x858 },
{ 0x860, 0x86a },
{ 0x870, 0x887 },
{ 0x889, 0x88e },
{ 0x889, 0x88f },
{ 0x8a0, 0x8c9 },
{ 0x904, 0x939 },
{ 0x93d, 0x93d },
@ -160,7 +160,7 @@ constexpr inline CharRange xid_start[] = {
{ 0xc2a, 0xc39 },
{ 0xc3d, 0xc3d },
{ 0xc58, 0xc5a },
{ 0xc5d, 0xc5d },
{ 0xc5c, 0xc5d },
{ 0xc60, 0xc61 },
{ 0xc80, 0xc80 },
{ 0xc85, 0xc8c },
@ -169,7 +169,7 @@ constexpr inline CharRange xid_start[] = {
{ 0xcaa, 0xcb3 },
{ 0xcb5, 0xcb9 },
{ 0xcbd, 0xcbd },
{ 0xcdd, 0xcde },
{ 0xcdc, 0xcde },
{ 0xce0, 0xce1 },
{ 0xcf1, 0xcf2 },
{ 0xd04, 0xd0c },
@ -351,11 +351,8 @@ constexpr inline CharRange xid_start[] = {
{ 0xa6a0, 0xa6ef },
{ 0xa717, 0xa71f },
{ 0xa722, 0xa788 },
{ 0xa78b, 0xa7cd },
{ 0xa7d0, 0xa7d1 },
{ 0xa7d3, 0xa7d3 },
{ 0xa7d5, 0xa7dc },
{ 0xa7f2, 0xa801 },
{ 0xa78b, 0xa7dc },
{ 0xa7f1, 0xa801 },
{ 0xa803, 0xa805 },
{ 0xa807, 0xa80a },
{ 0xa80c, 0xa822 },
@ -478,6 +475,7 @@ constexpr inline CharRange xid_start[] = {
{ 0x108f4, 0x108f5 },
{ 0x10900, 0x10915 },
{ 0x10920, 0x10939 },
{ 0x10940, 0x10959 },
{ 0x10980, 0x109b7 },
{ 0x109be, 0x109bf },
{ 0x10a00, 0x10a00 },
@ -500,7 +498,7 @@ constexpr inline CharRange xid_start[] = {
{ 0x10d6f, 0x10d85 },
{ 0x10e80, 0x10ea9 },
{ 0x10eb0, 0x10eb1 },
{ 0x10ec2, 0x10ec4 },
{ 0x10ec2, 0x10ec7 },
{ 0x10f00, 0x10f1c },
{ 0x10f27, 0x10f27 },
{ 0x10f30, 0x10f45 },
@ -593,6 +591,7 @@ constexpr inline CharRange xid_start[] = {
{ 0x11d67, 0x11d68 },
{ 0x11d6a, 0x11d89 },
{ 0x11d98, 0x11d98 },
{ 0x11db0, 0x11ddb },
{ 0x11ee0, 0x11ef2 },
{ 0x11f02, 0x11f02 },
{ 0x11f04, 0x11f10 },
@ -617,14 +616,17 @@ constexpr inline CharRange xid_start[] = {
{ 0x16b7d, 0x16b8f },
{ 0x16d40, 0x16d6c },
{ 0x16e40, 0x16e7f },
{ 0x16ea0, 0x16eb8 },
{ 0x16ebb, 0x16ed3 },
{ 0x16f00, 0x16f4a },
{ 0x16f50, 0x16f50 },
{ 0x16f93, 0x16f9f },
{ 0x16fe0, 0x16fe1 },
{ 0x16fe3, 0x16fe3 },
{ 0x17000, 0x187f7 },
{ 0x18800, 0x18cd5 },
{ 0x18cff, 0x18d08 },
{ 0x16ff2, 0x16ff6 },
{ 0x17000, 0x18cd5 },
{ 0x18cff, 0x18d1e },
{ 0x18d80, 0x18df2 },
{ 0x1aff0, 0x1aff3 },
{ 0x1aff5, 0x1affb },
{ 0x1affd, 0x1affe },
@ -679,6 +681,12 @@ constexpr inline CharRange xid_start[] = {
{ 0x1e4d0, 0x1e4eb },
{ 0x1e5d0, 0x1e5ed },
{ 0x1e5f0, 0x1e5f0 },
{ 0x1e6c0, 0x1e6de },
{ 0x1e6e0, 0x1e6e2 },
{ 0x1e6e4, 0x1e6e5 },
{ 0x1e6e7, 0x1e6ed },
{ 0x1e6f0, 0x1e6f4 },
{ 0x1e6fe, 0x1e6ff },
{ 0x1e7e0, 0x1e7e6 },
{ 0x1e7e8, 0x1e7eb },
{ 0x1e7ed, 0x1e7ee },
@ -720,14 +728,13 @@ constexpr inline CharRange xid_start[] = {
{ 0x1eea5, 0x1eea9 },
{ 0x1eeab, 0x1eebb },
{ 0x20000, 0x2a6df },
{ 0x2a700, 0x2b739 },
{ 0x2b740, 0x2b81d },
{ 0x2b820, 0x2cea1 },
{ 0x2a700, 0x2b81d },
{ 0x2b820, 0x2cead },
{ 0x2ceb0, 0x2ebe0 },
{ 0x2ebf0, 0x2ee5d },
{ 0x2f800, 0x2fa1d },
{ 0x30000, 0x3134a },
{ 0x31350, 0x323af },
{ 0x31350, 0x33479 },
};
constexpr inline CharRange xid_continue[] = {
@ -783,7 +790,7 @@ constexpr inline CharRange xid_continue[] = {
{ 0x840, 0x85b },
{ 0x860, 0x86a },
{ 0x870, 0x887 },
{ 0x889, 0x88e },
{ 0x889, 0x88f },
{ 0x897, 0x8e1 },
{ 0x8e3, 0x963 },
{ 0x966, 0x96f },
@ -873,7 +880,7 @@ constexpr inline CharRange xid_continue[] = {
{ 0xc4a, 0xc4d },
{ 0xc55, 0xc56 },
{ 0xc58, 0xc5a },
{ 0xc5d, 0xc5d },
{ 0xc5c, 0xc5d },
{ 0xc60, 0xc63 },
{ 0xc66, 0xc6f },
{ 0xc80, 0xc83 },
@ -886,7 +893,7 @@ constexpr inline CharRange xid_continue[] = {
{ 0xcc6, 0xcc8 },
{ 0xcca, 0xccd },
{ 0xcd5, 0xcd6 },
{ 0xcdd, 0xcde },
{ 0xcdc, 0xcde },
{ 0xce0, 0xce3 },
{ 0xce6, 0xcef },
{ 0xcf1, 0xcf3 },
@ -999,7 +1006,8 @@ constexpr inline CharRange xid_continue[] = {
{ 0x1a90, 0x1a99 },
{ 0x1aa7, 0x1aa7 },
{ 0x1ab0, 0x1abd },
{ 0x1abf, 0x1ace },
{ 0x1abf, 0x1add },
{ 0x1ae0, 0x1aeb },
{ 0x1b00, 0x1b4c },
{ 0x1b50, 0x1b59 },
{ 0x1b6b, 0x1b73 },
@ -1092,11 +1100,8 @@ constexpr inline CharRange xid_continue[] = {
{ 0xa67f, 0xa6f1 },
{ 0xa717, 0xa71f },
{ 0xa722, 0xa788 },
{ 0xa78b, 0xa7cd },
{ 0xa7d0, 0xa7d1 },
{ 0xa7d3, 0xa7d3 },
{ 0xa7d5, 0xa7dc },
{ 0xa7f2, 0xa827 },
{ 0xa78b, 0xa7dc },
{ 0xa7f1, 0xa827 },
{ 0xa82c, 0xa82c },
{ 0xa840, 0xa873 },
{ 0xa880, 0xa8c5 },
@ -1218,6 +1223,7 @@ constexpr inline CharRange xid_continue[] = {
{ 0x108f4, 0x108f5 },
{ 0x10900, 0x10915 },
{ 0x10920, 0x10939 },
{ 0x10940, 0x10959 },
{ 0x10980, 0x109b7 },
{ 0x109be, 0x109bf },
{ 0x10a00, 0x10a03 },
@ -1246,8 +1252,8 @@ constexpr inline CharRange xid_continue[] = {
{ 0x10e80, 0x10ea9 },
{ 0x10eab, 0x10eac },
{ 0x10eb0, 0x10eb1 },
{ 0x10ec2, 0x10ec4 },
{ 0x10efc, 0x10f1c },
{ 0x10ec2, 0x10ec7 },
{ 0x10efa, 0x10f1c },
{ 0x10f27, 0x10f27 },
{ 0x10f30, 0x10f50 },
{ 0x10f70, 0x10f85 },
@ -1341,6 +1347,7 @@ constexpr inline CharRange xid_continue[] = {
{ 0x11a50, 0x11a99 },
{ 0x11a9d, 0x11a9d },
{ 0x11ab0, 0x11af8 },
{ 0x11b60, 0x11b67 },
{ 0x11bc0, 0x11be0 },
{ 0x11bf0, 0x11bf9 },
{ 0x11c00, 0x11c08 },
@ -1363,6 +1370,8 @@ constexpr inline CharRange xid_continue[] = {
{ 0x11d90, 0x11d91 },
{ 0x11d93, 0x11d98 },
{ 0x11da0, 0x11da9 },
{ 0x11db0, 0x11ddb },
{ 0x11de0, 0x11de9 },
{ 0x11ee0, 0x11ef6 },
{ 0x11f00, 0x11f10 },
{ 0x11f12, 0x11f3a },
@ -1393,15 +1402,17 @@ constexpr inline CharRange xid_continue[] = {
{ 0x16d40, 0x16d6c },
{ 0x16d70, 0x16d79 },
{ 0x16e40, 0x16e7f },
{ 0x16ea0, 0x16eb8 },
{ 0x16ebb, 0x16ed3 },
{ 0x16f00, 0x16f4a },
{ 0x16f4f, 0x16f87 },
{ 0x16f8f, 0x16f9f },
{ 0x16fe0, 0x16fe1 },
{ 0x16fe3, 0x16fe4 },
{ 0x16ff0, 0x16ff1 },
{ 0x17000, 0x187f7 },
{ 0x18800, 0x18cd5 },
{ 0x18cff, 0x18d08 },
{ 0x16ff0, 0x16ff6 },
{ 0x17000, 0x18cd5 },
{ 0x18cff, 0x18d1e },
{ 0x18d80, 0x18df2 },
{ 0x1aff0, 0x1aff3 },
{ 0x1aff5, 0x1affb },
{ 0x1affd, 0x1affe },
@ -1479,6 +1490,9 @@ constexpr inline CharRange xid_continue[] = {
{ 0x1e2c0, 0x1e2f9 },
{ 0x1e4d0, 0x1e4f9 },
{ 0x1e5d0, 0x1e5fa },
{ 0x1e6c0, 0x1e6de },
{ 0x1e6e0, 0x1e6f5 },
{ 0x1e6fe, 0x1e6ff },
{ 0x1e7e0, 0x1e7e6 },
{ 0x1e7e8, 0x1e7eb },
{ 0x1e7ed, 0x1e7ee },
@ -1522,14 +1536,13 @@ constexpr inline CharRange xid_continue[] = {
{ 0x1eeab, 0x1eebb },
{ 0x1fbf0, 0x1fbf9 },
{ 0x20000, 0x2a6df },
{ 0x2a700, 0x2b739 },
{ 0x2b740, 0x2b81d },
{ 0x2b820, 0x2cea1 },
{ 0x2a700, 0x2b81d },
{ 0x2b820, 0x2cead },
{ 0x2ceb0, 0x2ebe0 },
{ 0x2ebf0, 0x2ee5d },
{ 0x2f800, 0x2fa1d },
{ 0x30000, 0x3134a },
{ 0x31350, 0x323af },
{ 0x31350, 0x33479 },
{ 0xe0100, 0xe01ef },
};
@ -2138,7 +2151,10 @@ constexpr inline CharRange uppercase_letter[] = {
{ 0xa7c4, 0xa7c7 },
{ 0xa7c9, 0xa7c9 },
{ 0xa7cb, 0xa7cc },
{ 0xa7ce, 0xa7ce },
{ 0xa7d0, 0xa7d0 },
{ 0xa7d2, 0xa7d2 },
{ 0xa7d4, 0xa7d4 },
{ 0xa7d6, 0xa7d6 },
{ 0xa7d8, 0xa7d8 },
{ 0xa7da, 0xa7da },
@ -2155,6 +2171,7 @@ constexpr inline CharRange uppercase_letter[] = {
{ 0x10d50, 0x10d65 },
{ 0x118a0, 0x118bf },
{ 0x16e40, 0x16e5f },
{ 0x16ea0, 0x16eb8 },
{ 0x1d400, 0x1d419 },
{ 0x1d434, 0x1d44d },
{ 0x1d468, 0x1d481 },
@ -2340,7 +2357,7 @@ constexpr inline CharRange lowercase_letter[] = {
{ 0x24b, 0x24b },
{ 0x24d, 0x24d },
{ 0x24f, 0x293 },
{ 0x295, 0x2b8 },
{ 0x296, 0x2b8 },
{ 0x2c0, 0x2c1 },
{ 0x2e0, 0x2e4 },
{ 0x345, 0x345 },
@ -2806,13 +2823,14 @@ constexpr inline CharRange lowercase_letter[] = {
{ 0xa7c8, 0xa7c8 },
{ 0xa7ca, 0xa7ca },
{ 0xa7cd, 0xa7cd },
{ 0xa7cf, 0xa7cf },
{ 0xa7d1, 0xa7d1 },
{ 0xa7d3, 0xa7d3 },
{ 0xa7d5, 0xa7d5 },
{ 0xa7d7, 0xa7d7 },
{ 0xa7d9, 0xa7d9 },
{ 0xa7db, 0xa7db },
{ 0xa7f2, 0xa7f4 },
{ 0xa7f1, 0xa7f4 },
{ 0xa7f6, 0xa7f6 },
{ 0xa7f8, 0xa7fa },
{ 0xab30, 0xab5a },
@ -2835,6 +2853,7 @@ constexpr inline CharRange lowercase_letter[] = {
{ 0x10d70, 0x10d85 },
{ 0x118c0, 0x118df },
{ 0x16e60, 0x16e7f },
{ 0x16ebb, 0x16ed3 },
{ 0x1d41a, 0x1d433 },
{ 0x1d44e, 0x1d454 },
{ 0x1d456, 0x1d467 },
@ -2924,7 +2943,7 @@ constexpr inline CharRange unicode_letter[] = {
{ 0x840, 0x858 },
{ 0x860, 0x86a },
{ 0x870, 0x887 },
{ 0x889, 0x88e },
{ 0x889, 0x88f },
{ 0x897, 0x897 },
{ 0x8a0, 0x8c9 },
{ 0x8d4, 0x8df },
@ -3015,7 +3034,7 @@ constexpr inline CharRange unicode_letter[] = {
{ 0xc4a, 0xc4c },
{ 0xc55, 0xc56 },
{ 0xc58, 0xc5a },
{ 0xc5d, 0xc5d },
{ 0xc5c, 0xc5d },
{ 0xc60, 0xc63 },
{ 0xc80, 0xc83 },
{ 0xc85, 0xc8c },
@ -3027,7 +3046,7 @@ constexpr inline CharRange unicode_letter[] = {
{ 0xcc6, 0xcc8 },
{ 0xcca, 0xccc },
{ 0xcd5, 0xcd6 },
{ 0xcdd, 0xcde },
{ 0xcdc, 0xcde },
{ 0xce0, 0xce3 },
{ 0xcf1, 0xcf3 },
{ 0xd00, 0xd0c },
@ -3227,11 +3246,8 @@ constexpr inline CharRange unicode_letter[] = {
{ 0xa67f, 0xa6ef },
{ 0xa717, 0xa71f },
{ 0xa722, 0xa788 },
{ 0xa78b, 0xa7cd },
{ 0xa7d0, 0xa7d1 },
{ 0xa7d3, 0xa7d3 },
{ 0xa7d5, 0xa7dc },
{ 0xa7f2, 0xa805 },
{ 0xa78b, 0xa7dc },
{ 0xa7f1, 0xa805 },
{ 0xa807, 0xa827 },
{ 0xa840, 0xa873 },
{ 0xa880, 0xa8c3 },
@ -3340,6 +3356,7 @@ constexpr inline CharRange unicode_letter[] = {
{ 0x108f4, 0x108f5 },
{ 0x10900, 0x10915 },
{ 0x10920, 0x10939 },
{ 0x10940, 0x10959 },
{ 0x10980, 0x109b7 },
{ 0x109be, 0x109bf },
{ 0x10a00, 0x10a03 },
@ -3365,8 +3382,8 @@ constexpr inline CharRange unicode_letter[] = {
{ 0x10e80, 0x10ea9 },
{ 0x10eab, 0x10eac },
{ 0x10eb0, 0x10eb1 },
{ 0x10ec2, 0x10ec4 },
{ 0x10efc, 0x10efc },
{ 0x10ec2, 0x10ec7 },
{ 0x10efa, 0x10efc },
{ 0x10f00, 0x10f1c },
{ 0x10f27, 0x10f27 },
{ 0x10f30, 0x10f45 },
@ -3459,6 +3476,7 @@ constexpr inline CharRange unicode_letter[] = {
{ 0x11a50, 0x11a97 },
{ 0x11a9d, 0x11a9d },
{ 0x11ab0, 0x11af8 },
{ 0x11b60, 0x11b67 },
{ 0x11bc0, 0x11be0 },
{ 0x11c00, 0x11c08 },
{ 0x11c0a, 0x11c36 },
@ -3481,6 +3499,7 @@ constexpr inline CharRange unicode_letter[] = {
{ 0x11d90, 0x11d91 },
{ 0x11d93, 0x11d96 },
{ 0x11d98, 0x11d98 },
{ 0x11db0, 0x11ddb },
{ 0x11ee0, 0x11ef6 },
{ 0x11f00, 0x11f10 },
{ 0x11f12, 0x11f3a },
@ -3505,15 +3524,17 @@ constexpr inline CharRange unicode_letter[] = {
{ 0x16b7d, 0x16b8f },
{ 0x16d40, 0x16d6c },
{ 0x16e40, 0x16e7f },
{ 0x16ea0, 0x16eb8 },
{ 0x16ebb, 0x16ed3 },
{ 0x16f00, 0x16f4a },
{ 0x16f4f, 0x16f87 },
{ 0x16f8f, 0x16f9f },
{ 0x16fe0, 0x16fe1 },
{ 0x16fe3, 0x16fe3 },
{ 0x16ff0, 0x16ff1 },
{ 0x17000, 0x187f7 },
{ 0x18800, 0x18cd5 },
{ 0x18cff, 0x18d08 },
{ 0x16ff0, 0x16ff6 },
{ 0x17000, 0x18cd5 },
{ 0x18cff, 0x18d1e },
{ 0x18d80, 0x18df2 },
{ 0x1aff0, 0x1aff3 },
{ 0x1aff5, 0x1affb },
{ 0x1affd, 0x1affe },
@ -3575,6 +3596,9 @@ constexpr inline CharRange unicode_letter[] = {
{ 0x1e4d0, 0x1e4eb },
{ 0x1e5d0, 0x1e5ed },
{ 0x1e5f0, 0x1e5f0 },
{ 0x1e6c0, 0x1e6de },
{ 0x1e6e0, 0x1e6f5 },
{ 0x1e6fe, 0x1e6ff },
{ 0x1e7e0, 0x1e7e6 },
{ 0x1e7e8, 0x1e7eb },
{ 0x1e7ed, 0x1e7ee },
@ -3620,12 +3644,11 @@ constexpr inline CharRange unicode_letter[] = {
{ 0x1f150, 0x1f169 },
{ 0x1f170, 0x1f189 },
{ 0x20000, 0x2a6df },
{ 0x2a700, 0x2b739 },
{ 0x2b740, 0x2b81d },
{ 0x2b820, 0x2cea1 },
{ 0x2a700, 0x2b81d },
{ 0x2b820, 0x2cead },
{ 0x2ceb0, 0x2ebe0 },
{ 0x2ebf0, 0x2ee5d },
{ 0x2f800, 0x2fa1d },
{ 0x30000, 0x3134a },
{ 0x31350, 0x323af },
{ 0x31350, 0x33479 },
};

View File

@ -32,8 +32,8 @@
// This file was generated using the `misc/scripts/ucaps_fetch.py` script.
#define LTU_LEN 1477
#define UTL_LEN 1460
#define LTU_LEN 1505
#define UTL_LEN 1488
static const int caps_table[LTU_LEN][2] = {
{ 0x0061, 0x0041 },
@ -1119,7 +1119,10 @@ static const int caps_table[LTU_LEN][2] = {
{ 0xA7C8, 0xA7C7 },
{ 0xA7CA, 0xA7C9 },
{ 0xA7CD, 0xA7CC },
{ 0xA7CF, 0xA7CE },
{ 0xA7D1, 0xA7D0 },
{ 0xA7D3, 0xA7D2 },
{ 0xA7D5, 0xA7D4 },
{ 0xA7D7, 0xA7D6 },
{ 0xA7D9, 0xA7D8 },
{ 0xA7DB, 0xA7DA },
@ -1479,6 +1482,31 @@ static const int caps_table[LTU_LEN][2] = {
{ 0x16E7D, 0x16E5D },
{ 0x16E7E, 0x16E5E },
{ 0x16E7F, 0x16E5F },
{ 0x16EBB, 0x16EA0 },
{ 0x16EBC, 0x16EA1 },
{ 0x16EBD, 0x16EA2 },
{ 0x16EBE, 0x16EA3 },
{ 0x16EBF, 0x16EA4 },
{ 0x16EC0, 0x16EA5 },
{ 0x16EC1, 0x16EA6 },
{ 0x16EC2, 0x16EA7 },
{ 0x16EC3, 0x16EA8 },
{ 0x16EC4, 0x16EA9 },
{ 0x16EC5, 0x16EAA },
{ 0x16EC6, 0x16EAB },
{ 0x16EC7, 0x16EAC },
{ 0x16EC8, 0x16EAD },
{ 0x16EC9, 0x16EAE },
{ 0x16ECA, 0x16EAF },
{ 0x16ECB, 0x16EB0 },
{ 0x16ECC, 0x16EB1 },
{ 0x16ECD, 0x16EB2 },
{ 0x16ECE, 0x16EB3 },
{ 0x16ECF, 0x16EB4 },
{ 0x16ED0, 0x16EB5 },
{ 0x16ED1, 0x16EB6 },
{ 0x16ED2, 0x16EB7 },
{ 0x16ED3, 0x16EB8 },
{ 0x1E922, 0x1E900 },
{ 0x1E923, 0x1E901 },
{ 0x1E924, 0x1E902 },
@ -2662,7 +2690,10 @@ static const int reverse_caps_table[UTL_LEN][2] = {
{ 0xA7C9, 0xA7CA },
{ 0xA7CB, 0x0264 },
{ 0xA7CC, 0xA7CD },
{ 0xA7CE, 0xA7CF },
{ 0xA7D0, 0xA7D1 },
{ 0xA7D2, 0xA7D3 },
{ 0xA7D4, 0xA7D5 },
{ 0xA7D6, 0xA7D7 },
{ 0xA7D8, 0xA7D9 },
{ 0xA7DA, 0xA7DB },
@ -2942,6 +2973,31 @@ static const int reverse_caps_table[UTL_LEN][2] = {
{ 0x16E5D, 0x16E7D },
{ 0x16E5E, 0x16E7E },
{ 0x16E5F, 0x16E7F },
{ 0x16EA0, 0x16EBB },
{ 0x16EA1, 0x16EBC },
{ 0x16EA2, 0x16EBD },
{ 0x16EA3, 0x16EBE },
{ 0x16EA4, 0x16EBF },
{ 0x16EA5, 0x16EC0 },
{ 0x16EA6, 0x16EC1 },
{ 0x16EA7, 0x16EC2 },
{ 0x16EA8, 0x16EC3 },
{ 0x16EA9, 0x16EC4 },
{ 0x16EAA, 0x16EC5 },
{ 0x16EAB, 0x16EC6 },
{ 0x16EAC, 0x16EC7 },
{ 0x16EAD, 0x16EC8 },
{ 0x16EAE, 0x16EC9 },
{ 0x16EAF, 0x16ECA },
{ 0x16EB0, 0x16ECB },
{ 0x16EB1, 0x16ECC },
{ 0x16EB2, 0x16ECD },
{ 0x16EB3, 0x16ECE },
{ 0x16EB4, 0x16ECF },
{ 0x16EB5, 0x16ED0 },
{ 0x16EB6, 0x16ED1 },
{ 0x16EB7, 0x16ED2 },
{ 0x16EB8, 0x16ED3 },
{ 0x1E900, 0x1E922 },
{ 0x1E901, 0x1E923 },
{ 0x1E902, 0x1E924 },

View File

@ -34,7 +34,7 @@
#define UNICODE_RANGES_INC
// Unicode Character Blocks
// Source: https://www.unicode.org/Public/16.0.0/ucd/Blocks.txt
// Source: https://www.unicode.org/Public/17.0.0/ucd/Blocks.txt
struct UniRange {
int32_t start;
@ -233,6 +233,7 @@ static UniRange unicode_ranges[] = {
{ 0x108E0, 0x108FF, U"Hatran" },
{ 0x10900, 0x1091F, U"Phoenician" },
{ 0x10920, 0x1093F, U"Lydian" },
{ 0x10940, 0x1095F, U"Sidetic" },
{ 0x10980, 0x1099F, U"Meroitic Hieroglyphs" },
{ 0x109A0, 0x109FF, U"Meroitic Cursive" },
{ 0x10A00, 0x10A5F, U"Kharoshthi" },
@ -284,11 +285,13 @@ static UniRange unicode_ranges[] = {
{ 0x11AB0, 0x11ABF, U"Unified Canadian Aboriginal Syllabics Extended-A" },
{ 0x11AC0, 0x11AFF, U"Pau Cin Hau" },
{ 0x11B00, 0x11B5F, U"Devanagari Extended-A" },
{ 0x11B60, 0x11B7F, U"Sharada Supplement" },
{ 0x11BC0, 0x11BFF, U"Sunuwar" },
{ 0x11C00, 0x11C6F, U"Bhaiksuki" },
{ 0x11C70, 0x11CBF, U"Marchen" },
{ 0x11D00, 0x11D5F, U"Masaram Gondi" },
{ 0x11D60, 0x11DAF, U"Gunjala Gondi" },
{ 0x11DB0, 0x11DEF, U"Tolong Siki" },
{ 0x11EE0, 0x11EFF, U"Makasar" },
{ 0x11F00, 0x11F5F, U"Kawi" },
{ 0x11FB0, 0x11FBF, U"Lisu Supplement" },
@ -308,12 +311,14 @@ static UniRange unicode_ranges[] = {
{ 0x16B00, 0x16B8F, U"Pahawh Hmong" },
{ 0x16D40, 0x16D7F, U"Kirat Rai" },
{ 0x16E40, 0x16E9F, U"Medefaidrin" },
{ 0x16EA0, 0x16EDF, U"Beria Erfe" },
{ 0x16F00, 0x16F9F, U"Miao" },
{ 0x16FE0, 0x16FFF, U"Ideographic Symbols and Punctuation" },
{ 0x17000, 0x187FF, U"Tangut" },
{ 0x18800, 0x18AFF, U"Tangut Components" },
{ 0x18B00, 0x18CFF, U"Khitan Small Script" },
{ 0x18D00, 0x18D7F, U"Tangut Supplement" },
{ 0x18D80, 0x18DFF, U"Tangut Components Supplement" },
{ 0x1AFF0, 0x1AFFF, U"Kana Extended-B" },
{ 0x1B000, 0x1B0FF, U"Kana Supplement" },
{ 0x1B100, 0x1B12F, U"Kana Extended-A" },
@ -322,6 +327,7 @@ static UniRange unicode_ranges[] = {
{ 0x1BC00, 0x1BC9F, U"Duployan" },
{ 0x1BCA0, 0x1BCAF, U"Shorthand Format Controls" },
{ 0x1CC00, 0x1CEBF, U"Symbols for Legacy Computing Supplement" },
{ 0x1CEC0, 0x1CEFF, U"Miscellaneous Symbols Supplement" },
{ 0x1CF00, 0x1CFCF, U"Znamenny Musical Notation" },
{ 0x1D000, 0x1D0FF, U"Byzantine Musical Symbols" },
{ 0x1D100, 0x1D1FF, U"Musical Symbols" },
@ -340,6 +346,7 @@ static UniRange unicode_ranges[] = {
{ 0x1E2C0, 0x1E2FF, U"Wancho" },
{ 0x1E4D0, 0x1E4FF, U"Nag Mundari" },
{ 0x1E5D0, 0x1E5FF, U"Ol Onal" },
{ 0x1E6C0, 0x1E6FF, U"Tai Yo" },
{ 0x1E7E0, 0x1E7FF, U"Ethiopic Extended-B" },
{ 0x1E800, 0x1E8DF, U"Mende Kikakui" },
{ 0x1E900, 0x1E95F, U"Adlam" },
@ -371,6 +378,7 @@ static UniRange unicode_ranges[] = {
{ 0x2F800, 0x2FA1F, U"CJK Compatibility Ideographs Supplement" },
{ 0x30000, 0x3134F, U"CJK Unified Ideographs Extension G" },
{ 0x31350, 0x323AF, U"CJK Unified Ideographs Extension H" },
{ 0x323B0, 0x3347F, U"CJK Unified Ideographs Extension J" },
{ 0xF0000, 0xFFFFF, U"Supplementary Private Use Area-A" },
{ 0x100000, 0x10FFFF, U"Supplementary Private Use Area-B" },
{ 0x10FFFF, 0x10FFFF, String() }

View File

@ -16,7 +16,7 @@ if __name__ == "__main__":
from methods import generate_copyright_header
URL: Final[str] = "https://www.unicode.org/Public/16.0.0/ucd/DerivedCoreProperties.txt"
URL: Final[str] = "https://www.unicode.org/Public/17.0.0/ucd/DerivedCoreProperties.txt"
xid_start: list[tuple[int, int]] = []

View File

@ -16,7 +16,7 @@ if __name__ == "__main__":
from methods import generate_copyright_header
URL: Final[str] = "https://www.unicode.org/Public/16.0.0/ucd/UnicodeData.txt"
URL: Final[str] = "https://www.unicode.org/Public/17.0.0/ucd/UnicodeData.txt"
lower_to_upper: list[tuple[str, str]] = []

View File

@ -16,7 +16,7 @@ if __name__ == "__main__":
from methods import generate_copyright_header
URL: Final[str] = "https://www.unicode.org/Public/16.0.0/ucd/Blocks.txt"
URL: Final[str] = "https://www.unicode.org/Public/17.0.0/ucd/Blocks.txt"
ranges: list[tuple[str, str, str]] = []