From 06cba6054b201f8590879f289a5b16cb819af2ff Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Pa=CC=84vels=20Nadtoc=CC=8Cajevs?= <7645683+bruvzg@users.noreply.github.com> Date: Sun, 12 Jan 2025 22:55:38 +0200 Subject: [PATCH] Add plural form support for Translation, OptimizedTranslation and CSV translation import. --- core/string/optimized_translation.cpp | 158 +++++++++---- core/string/optimized_translation.h | 7 +- core/string/translation.cpp | 217 ++++++++++++++++-- core/string/translation.h | 35 ++- core/string/translation_po.cpp | 118 ---------- core/string/translation_po.h | 26 --- doc/classes/Translation.xml | 25 ++ .../resource_importer_csv_translation.cpp | 19 +- tests/core/string/test_translation.h | 35 ++- tests/data/translations.csv | 9 +- 10 files changed, 434 insertions(+), 215 deletions(-) diff --git a/core/string/optimized_translation.cpp b/core/string/optimized_translation.cpp index 71be8524cf1..a6f67aadffc 100644 --- a/core/string/optimized_translation.cpp +++ b/core/string/optimized_translation.cpp @@ -52,7 +52,13 @@ void OptimizedTranslation::generate(const Ref &p_from) { int size = Math::larger_prime(keys.size()); - Vector>> buckets; + struct TrItem { + uint32_t idx = 0; + CharString cs; + uint32_t plural = 0; + }; + + Vector> buckets; Vector> table; Vector hfunc_table; Vector compressed; @@ -60,54 +66,60 @@ void OptimizedTranslation::generate(const Ref &p_from) { table.resize(size); hfunc_table.resize(size); buckets.resize(size); - compressed.resize(keys.size()); int idx = 0; int total_compression_size = 0; + set_plural_rule(p_from->get_plural_rule()); + for (const StringName &E : keys) { - //hash string - CharString cs = E.operator String().utf8(); - uint32_t h = hash(0, cs.get_data()); - Pair p; - p.first = idx; - p.second = cs; - buckets.write[h % size].push_back(p); + Vector srcs = p_from->get_plural_messages(E); + for (int n = 0; n < srcs.size(); n++) { + //hash string + CharString cs = E.operator String().utf8(); + uint32_t h = hash(0, cs.get_data(), n); + TrItem p; + p.idx = idx; + p.cs = cs; + p.plural = n; + buckets.write[h % size].push_back(p); - //compress string - CharString src_s = p_from->get_message(E).operator String().utf8(); - CompressedString ps; - ps.orig_len = src_s.size(); - ps.offset = total_compression_size; + //compress string + CharString src_s = srcs[n].utf8(); - if (ps.orig_len != 0) { - CharString dst_s; - dst_s.resize(src_s.size()); - int ret = smaz_compress(src_s.get_data(), src_s.size(), dst_s.ptrw(), src_s.size()); - if (ret >= src_s.size()) { - //if compressed is larger than original, just use original - ps.orig_len = src_s.size(); - ps.compressed = src_s; + CompressedString ps; + ps.orig_len = src_s.size(); + ps.offset = total_compression_size; + + if (ps.orig_len != 0) { + CharString dst_s; + dst_s.resize(src_s.size()); + int ret = smaz_compress(src_s.get_data(), src_s.size(), dst_s.ptrw(), src_s.size()); + if (ret >= src_s.size()) { + //if compressed is larger than original, just use original + ps.orig_len = src_s.size(); + ps.compressed = src_s; + } else { + dst_s.resize(ret); + //ps.orig_len=; + ps.compressed = dst_s; + } } else { - dst_s.resize(ret); - //ps.orig_len=; - ps.compressed = dst_s; + ps.orig_len = 1; + ps.compressed.resize(1); + ps.compressed[0] = 0; } - } else { - ps.orig_len = 1; - ps.compressed.resize(1); - ps.compressed[0] = 0; - } - compressed.write[idx] = ps; - total_compression_size += ps.compressed.size(); - idx++; + compressed.push_back(ps); + total_compression_size += ps.compressed.size(); + idx++; + } } int bucket_table_size = 0; for (int i = 0; i < size; i++) { - const Vector> &b = buckets[i]; + const Vector &b = buckets[i]; HashMap &t = table.write[i]; if (b.size() == 0) { @@ -118,13 +130,13 @@ void OptimizedTranslation::generate(const Ref &p_from) { int item = 0; while (item < b.size()) { - uint32_t slot = hash(d, b[item].second.get_data()); + uint32_t slot = hash(d, b[item].cs.get_data(), b[item].plural); if (t.has(slot)) { item = 0; d++; t.clear(); } else { - t[slot] = b[item].first; + t[slot] = b[item].idx; item++; } } @@ -188,6 +200,8 @@ bool OptimizedTranslation::_set(const StringName &p_name, const Variant &p_value strings = p_value; } else if (prop_name == "load_from") { generate(p_value); + } else if (prop_name == "plural_rule") { + set_plural_rule(p_value); } else { return false; } @@ -203,6 +217,8 @@ bool OptimizedTranslation::_get(const StringName &p_name, Variant &r_ret) const r_ret = bucket_table; } else if (prop_name == "strings") { r_ret = strings; + } else if (prop_name == "plural_rule") { + r_ret = get_plural_rule(); } else { return false; } @@ -220,7 +236,7 @@ StringName OptimizedTranslation::get_message(const StringName &p_src_text, const } CharString str = p_src_text.operator String().utf8(); - uint32_t h = hash(0, str.get_data()); + uint32_t h = hash(0, str.get_data(), 0); const int *htr = hash_table.ptr(); const uint32_t *htptr = (const uint32_t *)&htr[0]; @@ -237,7 +253,7 @@ StringName OptimizedTranslation::get_message(const StringName &p_src_text, const const Bucket &bucket = *(const Bucket *)&btptr[p]; - h = hash(bucket.func, str.get_data()); + h = hash(bucket.func, str.get_data(), 0); int idx = -1; @@ -301,14 +317,76 @@ Vector OptimizedTranslation::get_translated_message_list() const { } StringName OptimizedTranslation::get_plural_message(const StringName &p_src_text, const StringName &p_plural_text, int p_n, const StringName &p_context) const { - // The use of plurals translation is not yet supported in OptimizedTranslation. - return get_message(p_src_text, p_context); + ERR_FAIL_COND_V_MSG(p_n < 0, StringName(), "N passed into translation to get a plural message should not be negative. For negative numbers, use singular translation please. Search \"gettext PO Plural Forms\" online for the documentation on translating negative numbers."); + // p_context passed in is ignore. The use of context is not yet supported in OptimizedTranslation. + + int plural_index = (p_n == last_plural_n && p_src_text == last_plural_key) ? last_plural_mapped_index : _get_plural_index(p_n); + + int htsize = hash_table.size(); + + if (htsize == 0) { + return StringName(); + } + + CharString str = p_src_text.operator String().utf8(); + uint32_t h = hash(0, str.get_data(), plural_index); + + const int *htr = hash_table.ptr(); + const uint32_t *htptr = (const uint32_t *)&htr[0]; + const int *btr = bucket_table.ptr(); + const uint32_t *btptr = (const uint32_t *)&btr[0]; + const uint8_t *sr = strings.ptr(); + const char *sptr = (const char *)&sr[0]; + + uint32_t p = htptr[h % htsize]; + + if (p == 0xFFFFFFFF) { + return StringName(); //nothing + } + + const Bucket &bucket = *(const Bucket *)&btptr[p]; + + h = hash(bucket.func, str.get_data(), plural_index); + + int idx = -1; + + for (int i = 0; i < bucket.size; i++) { + if (bucket.elem[i].key == h) { + idx = i; + break; + } + } + + if (idx == -1) { + return StringName(); + } + + // Cache result so that if the next entry is the same, we can return directly. + // _get_plural_index(p_n) can get very costly, especially when evaluating long plural-rule (Arabic) + last_plural_key = p_src_text; + last_plural_n = p_n; + last_plural_mapped_index = plural_index; + + if (bucket.elem[idx].comp_size == bucket.elem[idx].uncomp_size) { + String rstr; + rstr.parse_utf8(&sptr[bucket.elem[idx].str_offset], bucket.elem[idx].uncomp_size); + + return rstr; + } else { + CharString uncomp; + uncomp.resize(bucket.elem[idx].uncomp_size + 1); + smaz_decompress(&sptr[bucket.elem[idx].str_offset], bucket.elem[idx].comp_size, uncomp.ptrw(), bucket.elem[idx].uncomp_size); + String rstr; + rstr.parse_utf8(uncomp.get_data()); + return rstr; + } } void OptimizedTranslation::_get_property_list(List *p_list) const { p_list->push_back(PropertyInfo(Variant::PACKED_INT32_ARRAY, "hash_table")); p_list->push_back(PropertyInfo(Variant::PACKED_INT32_ARRAY, "bucket_table")); p_list->push_back(PropertyInfo(Variant::PACKED_BYTE_ARRAY, "strings")); + p_list->push_back(PropertyInfo(Variant::STRING, "plural_rule")); p_list->push_back(PropertyInfo(Variant::OBJECT, "load_from", PROPERTY_HINT_RESOURCE_TYPE, "Translation", PROPERTY_USAGE_EDITOR)); } diff --git a/core/string/optimized_translation.h b/core/string/optimized_translation.h index 3992cc5149b..ae37689be14 100644 --- a/core/string/optimized_translation.h +++ b/core/string/optimized_translation.h @@ -60,7 +60,7 @@ class OptimizedTranslation : public Translation { Elem elem[1]; }; - _FORCE_INLINE_ uint32_t hash(uint32_t d, const char *p_str) const { + _FORCE_INLINE_ uint32_t hash(uint32_t d, const char *p_str, uint32_t p_n) const { if (d == 0) { d = 0x1000193; } @@ -68,10 +68,15 @@ class OptimizedTranslation : public Translation { d = (d * 0x1000193) ^ uint32_t(*p_str); p_str++; } + d = (d * 0x1000193) ^ p_n; return d; } + mutable StringName last_plural_key; + mutable int last_plural_n = -1; // Set it to an impossible value at the beginning. + mutable int last_plural_mapped_index = 0; + protected: bool _set(const StringName &p_name, const Variant &p_value); bool _get(const StringName &p_name, Variant &r_ret) const; diff --git a/core/string/translation.cpp b/core/string/translation.cpp index d944135a709..a0ba9bf7162 100644 --- a/core/string/translation.cpp +++ b/core/string/translation.cpp @@ -34,19 +34,148 @@ #include "core/os/thread.h" #include "core/string/translation_server.h" +int Translation::_get_plural_index(int p_n) const { + // Get a number between [0;number of plural forms). + + input_val.clear(); + input_val.push_back(p_n); + + return _eq_test(equi_tests, 0); +} + +int Translation::_eq_test(const Ref &p_node, const Variant &p_result) const { + if (p_node.is_valid()) { + Error err = expr->parse(p_node->regex, input_name); + ERR_FAIL_COND_V_MSG(err != OK, 0, vformat("Cannot parse expression \"%s\". Error: %s", p_node->regex, expr->get_error_text())); + + Variant result = expr->execute(input_val); + ERR_FAIL_COND_V_MSG(expr->has_execute_failed(), 0, vformat("Cannot evaluate expression \"%s\".", p_node->regex)); + + if (bool(result)) { + return _eq_test(p_node->left, result); + } else { + return _eq_test(p_node->right, result); + } + } else { + return p_result; + } +} + +int Translation::_find_unquoted(const String &p_src, char32_t p_chr) const { + const int len = p_src.length(); + if (len == 0) { + return -1; + } + + const char32_t *src = p_src.get_data(); + bool in_quote = false; + for (int i = 0; i < len; i++) { + if (in_quote) { + if (src[i] == ')') { + in_quote = false; + } + } else { + if (src[i] == '(') { + in_quote = true; + } else if (src[i] == p_chr) { + return i; + } + } + } + + return -1; +} + +void Translation::_cache_plural_tests(const String &p_plural_rule, Ref &p_node) { + // Some examples of p_plural_rule passed in can have the form: + // "n==0 ? 0 : n==1 ? 1 : n==2 ? 2 : n%100>=3 && n%100<=10 ? 3 : n%100>=11 && n%100<=99 ? 4 : 5" (Arabic) + // "n >= 2" (French) // When evaluating the last, especially careful with this one. + // "n != 1" (English) + + String rule = p_plural_rule; + if (rule.begins_with("(") && rule.ends_with(")")) { + int bcount = 0; + for (int i = 1; i < rule.length() - 1 && bcount >= 0; i++) { + if (rule[i] == '(') { + bcount++; + } else if (rule[i] == ')') { + bcount--; + } + } + if (bcount == 0) { + rule = rule.substr(1, rule.length() - 2); + } + } + + int first_ques_mark = _find_unquoted(rule, '?'); + int first_colon = _find_unquoted(rule, ':'); + + if (first_ques_mark == -1) { + p_node->regex = rule.strip_edges(); + return; + } + + p_node->regex = rule.substr(0, first_ques_mark).strip_edges(); + + p_node->left.instantiate(); + _cache_plural_tests(rule.substr(first_ques_mark + 1, first_colon - first_ques_mark - 1).strip_edges(), p_node->left); + p_node->right.instantiate(); + _cache_plural_tests(rule.substr(first_colon + 1).strip_edges(), p_node->right); +} + +void Translation::set_plural_rule(const String &p_plural_rule) { + // Set plural_forms and plural_rule. + // p_plural_rule passed in has the form "Plural-Forms: nplurals=2; plural=(n >= 2);". + + int first_semi_col = p_plural_rule.find_char(';'); + plural_forms = p_plural_rule.substr(p_plural_rule.find_char('=') + 1, first_semi_col - (p_plural_rule.find_char('=') + 1)).to_int(); + + int expression_start = p_plural_rule.find_char('=', first_semi_col) + 1; + int second_semi_col = p_plural_rule.rfind_char(';'); + plural_rule = p_plural_rule.substr(expression_start, second_semi_col - expression_start).strip_edges(); + + // Setup the cache to make evaluating plural rule faster later on. + equi_tests.instantiate(); + _cache_plural_tests(plural_rule, equi_tests); + + expr.instantiate(); + input_name.push_back("n"); +} + +int Translation::get_plural_forms() const { + return plural_forms; +} + +String Translation::get_plural_rule() const { + return plural_rule; +} + Dictionary Translation::_get_messages() const { Dictionary d; - for (const KeyValue &E : translation_map) { + for (const KeyValue> &E : translation_map) { d[E.key] = E.value; } return d; } +void Translation::_set_messages(const Dictionary &p_messages) { + List keys; + p_messages.get_key_list(&keys); + for (const Variant &E : keys) { + if (p_messages[E].get_type() == Variant::STRING || p_messages[E].get_type() == Variant::STRING_NAME) { + PackedStringArray arr = { p_messages[E].operator String() }; + translation_map[E] = arr; + } else if (p_messages[E].get_type() == Variant::PACKED_STRING_ARRAY) { + translation_map[E] = p_messages[E]; + } + } +} + Vector Translation::_get_message_list() const { Vector msgs; msgs.resize(translation_map.size()); int idx = 0; - for (const KeyValue &E : translation_map) { + for (const KeyValue> &E : translation_map) { msgs.set(idx, E.key); idx += 1; } @@ -56,24 +185,15 @@ Vector Translation::_get_message_list() const { Vector Translation::get_translated_message_list() const { Vector msgs; - msgs.resize(translation_map.size()); - int idx = 0; - for (const KeyValue &E : translation_map) { - msgs.set(idx, E.value); - idx += 1; + for (const KeyValue> &E : translation_map) { + for (const String &F : E.value) { + msgs.push_back(F); + } } return msgs; } -void Translation::_set_messages(const Dictionary &p_messages) { - List keys; - p_messages.get_key_list(&keys); - for (const Variant &E : keys) { - translation_map[E] = p_messages[E]; - } -} - void Translation::set_locale(const String &p_locale) { locale = TranslationServer::get_singleton()->standardize_locale(p_locale); @@ -94,13 +214,13 @@ void Translation::_notify_translation_changed_if_applies() { } void Translation::add_message(const StringName &p_src_text, const StringName &p_xlated_text, const StringName &p_context) { - translation_map[p_src_text] = p_xlated_text; + Vector arr = { p_xlated_text }; + translation_map[p_src_text] = arr; } void Translation::add_plural_message(const StringName &p_src_text, const Vector &p_plural_xlated_texts, const StringName &p_context) { - WARN_PRINT("Translation class doesn't handle plural messages. Calling add_plural_message() on a Translation instance is probably a mistake. \nUse a derived Translation class that handles plurals, such as TranslationPO class"); ERR_FAIL_COND_MSG(p_plural_xlated_texts.is_empty(), "Parameter vector p_plural_xlated_texts passed in is empty."); - translation_map[p_src_text] = p_plural_xlated_texts[0]; + translation_map[p_src_text] = p_plural_xlated_texts; } StringName Translation::get_message(const StringName &p_src_text, const StringName &p_context) const { @@ -113,22 +233,63 @@ StringName Translation::get_message(const StringName &p_src_text, const StringNa WARN_PRINT("Translation class doesn't handle context. Using context in get_message() on a Translation instance is probably a mistake. \nUse a derived Translation class that handles context, such as TranslationPO class"); } - HashMap::ConstIterator E = translation_map.find(p_src_text); + HashMap>::ConstIterator E = translation_map.find(p_src_text); if (!E) { return StringName(); } - return E->value; + return E->value[0]; } StringName Translation::get_plural_message(const StringName &p_src_text, const StringName &p_plural_text, int p_n, const StringName &p_context) const { + ERR_FAIL_COND_V_MSG(p_n < 0, StringName(), "N passed into translation to get a plural message should not be negative. For negative numbers, use singular translation please. Search \"gettext PO Plural Forms\" online for the documentation on translating negative numbers."); + StringName ret; if (GDVIRTUAL_CALL(_get_plural_message, p_src_text, p_plural_text, p_n, p_context, ret)) { return ret; } - WARN_PRINT("Translation class doesn't handle plural messages. Calling get_plural_message() on a Translation instance is probably a mistake. \nUse a derived Translation class that handles plurals, such as TranslationPO class"); - return get_message(p_src_text); + if (p_context != StringName()) { + WARN_PRINT("Translation class doesn't handle context. Using context in get_message() on a Translation instance is probably a mistake. \nUse a derived Translation class that handles context, such as TranslationPO class"); + } + + // If the query is the same as last time, return the cached result. + if (p_n == last_plural_n && p_src_text == last_plural_key) { + return translation_map[p_src_text][last_plural_mapped_index]; + } + + HashMap>::ConstIterator E = translation_map.find(p_src_text); + if (!E) { + return StringName(); + } + int plural_index = _get_plural_index(p_n); + ERR_FAIL_COND_V_MSG(plural_index < 0 || E->value.size() < plural_index, StringName(), "Plural index returned or number of plural translations is not valid. Please report this bug."); + + // Cache result so that if the next entry is the same, we can return directly. + // _get_plural_index(p_n) can get very costly, especially when evaluating long plural-rule (Arabic) + last_plural_key = p_src_text; + last_plural_n = p_n; + last_plural_mapped_index = plural_index; + + return E->value[plural_index]; +} + +Vector Translation::get_plural_messages(const StringName &p_src_text, const StringName &p_context) const { + Vector ret; + if (GDVIRTUAL_CALL(_get_plural_messages, p_src_text, p_context, ret)) { + return ret; + } + + if (p_context != StringName()) { + WARN_PRINT("Translation class doesn't handle context. Using context in get_message() on a Translation instance is probably a mistake. \nUse a derived Translation class that handles context, such as TranslationPO class"); + } + + HashMap>::ConstIterator E = translation_map.find(p_src_text); + if (!E) { + return Vector(); + } + + return E->value; } void Translation::erase_message(const StringName &p_src_text, const StringName &p_context) { @@ -136,11 +297,13 @@ void Translation::erase_message(const StringName &p_src_text, const StringName & WARN_PRINT("Translation class doesn't handle context. Using context in erase_message() on a Translation instance is probably a mistake. \nUse a derived Translation class that handles context, such as TranslationPO class"); } - translation_map.erase(p_src_text); + if (translation_map.has(p_src_text)) { + translation_map.erase(p_src_text); + } } void Translation::get_message_list(List *r_messages) const { - for (const KeyValue &E : translation_map) { + for (const KeyValue> &E : translation_map) { r_messages->push_back(E.key); } } @@ -156,16 +319,22 @@ void Translation::_bind_methods() { ClassDB::bind_method(D_METHOD("add_plural_message", "src_message", "xlated_messages", "context"), &Translation::add_plural_message, DEFVAL(StringName())); ClassDB::bind_method(D_METHOD("get_message", "src_message", "context"), &Translation::get_message, DEFVAL(StringName())); ClassDB::bind_method(D_METHOD("get_plural_message", "src_message", "src_plural_message", "n", "context"), &Translation::get_plural_message, DEFVAL(StringName())); + ClassDB::bind_method(D_METHOD("get_plural_messages", "src_message", "context"), &Translation::get_plural_messages, DEFVAL(StringName())); ClassDB::bind_method(D_METHOD("erase_message", "src_message", "context"), &Translation::erase_message, DEFVAL(StringName())); ClassDB::bind_method(D_METHOD("get_message_list"), &Translation::_get_message_list); ClassDB::bind_method(D_METHOD("get_translated_message_list"), &Translation::get_translated_message_list); ClassDB::bind_method(D_METHOD("get_message_count"), &Translation::get_message_count); ClassDB::bind_method(D_METHOD("_set_messages", "messages"), &Translation::_set_messages); ClassDB::bind_method(D_METHOD("_get_messages"), &Translation::_get_messages); + ClassDB::bind_method(D_METHOD("get_plural_forms"), &Translation::get_plural_forms); + ClassDB::bind_method(D_METHOD("set_plural_rule", "rule"), &Translation::set_plural_rule); + ClassDB::bind_method(D_METHOD("get_plural_rule"), &Translation::get_plural_rule); + GDVIRTUAL_BIND(_get_plural_messages, "src_message", "context"); GDVIRTUAL_BIND(_get_plural_message, "src_message", "src_plural_message", "n", "context"); GDVIRTUAL_BIND(_get_message, "src_message", "context"); ADD_PROPERTY(PropertyInfo(Variant::DICTIONARY, "messages", PROPERTY_HINT_NONE, "", PROPERTY_USAGE_NO_EDITOR | PROPERTY_USAGE_INTERNAL), "_set_messages", "_get_messages"); ADD_PROPERTY(PropertyInfo(Variant::STRING, "locale"), "set_locale", "get_locale"); + ADD_PROPERTY(PropertyInfo(Variant::STRING, "plural_rule"), "set_plural_rule", "get_plural_rule"); } diff --git a/core/string/translation.h b/core/string/translation.h index 4e8cffc90cf..f7cd0a4baea 100644 --- a/core/string/translation.h +++ b/core/string/translation.h @@ -32,6 +32,7 @@ #define TRANSLATION_H #include "core/io/resource.h" +#include "core/math/expression.h" #include "core/object/gdvirtual.gen.inc" class Translation : public Resource { @@ -40,7 +41,11 @@ class Translation : public Resource { RES_BASE_EXTENSION("translation"); String locale = "en"; - HashMap translation_map; + HashMap> translation_map; + + mutable StringName last_plural_key; + mutable int last_plural_n = -1; // Set it to an impossible value at the beginning. + mutable int last_plural_mapped_index = 0; virtual Vector _get_message_list() const; virtual Dictionary _get_messages() const; @@ -49,10 +54,33 @@ class Translation : public Resource { void _notify_translation_changed_if_applies(); protected: + int plural_forms = 0; // 0 means no "Plural-Forms" is given in the PO header file. The min for all languages is 1. + String plural_rule; + + // Cache temporary variables related to _get_plural_index() to make it faster + class EQNode : public RefCounted { + public: + String regex; + Ref left; + Ref right; + }; + Ref equi_tests; + + int _find_unquoted(const String &p_src, char32_t p_chr) const; + int _eq_test(const Ref &p_node, const Variant &p_result) const; + + Vector input_name; + mutable Ref expr; + mutable Array input_val; + + void _cache_plural_tests(const String &p_plural_rule, Ref &p_node); + int _get_plural_index(int p_n) const; + static void _bind_methods(); GDVIRTUAL2RC(StringName, _get_message, StringName, StringName); GDVIRTUAL4RC(StringName, _get_plural_message, StringName, StringName, int, StringName); + GDVIRTUAL2RC(Vector, _get_plural_messages, StringName, StringName); public: void set_locale(const String &p_locale); @@ -62,11 +90,16 @@ public: virtual void add_plural_message(const StringName &p_src_text, const Vector &p_plural_xlated_texts, const StringName &p_context = ""); virtual StringName get_message(const StringName &p_src_text, const StringName &p_context = "") const; //overridable for other implementations virtual StringName get_plural_message(const StringName &p_src_text, const StringName &p_plural_text, int p_n, const StringName &p_context = "") const; + virtual Vector get_plural_messages(const StringName &p_src_text, const StringName &p_context = "") const; virtual void erase_message(const StringName &p_src_text, const StringName &p_context = ""); virtual void get_message_list(List *r_messages) const; virtual int get_message_count() const; virtual Vector get_translated_message_list() const; + virtual void set_plural_rule(const String &p_plural_rule); + virtual int get_plural_forms() const; + virtual String get_plural_rule() const; + Translation() {} }; diff --git a/core/string/translation_po.cpp b/core/string/translation_po.cpp index 7eb8a2afeba..1ecbfa3be22 100644 --- a/core/string/translation_po.cpp +++ b/core/string/translation_po.cpp @@ -134,114 +134,6 @@ Vector TranslationPO::_get_message_list() const { return v; } -int TranslationPO::_get_plural_index(int p_n) const { - // Get a number between [0;number of plural forms). - - input_val.clear(); - input_val.push_back(p_n); - - return _eq_test(equi_tests, 0); -} - -int TranslationPO::_eq_test(const Ref &p_node, const Variant &p_result) const { - if (p_node.is_valid()) { - Error err = expr->parse(p_node->regex, input_name); - ERR_FAIL_COND_V_MSG(err != OK, 0, vformat("Cannot parse expression \"%s\". Error: %s", p_node->regex, expr->get_error_text())); - - Variant result = expr->execute(input_val); - ERR_FAIL_COND_V_MSG(expr->has_execute_failed(), 0, vformat("Cannot evaluate expression \"%s\".", p_node->regex)); - - if (bool(result)) { - return _eq_test(p_node->left, result); - } else { - return _eq_test(p_node->right, result); - } - } else { - return p_result; - } -} - -int TranslationPO::_find_unquoted(const String &p_src, char32_t p_chr) const { - const int len = p_src.length(); - if (len == 0) { - return -1; - } - - const char32_t *src = p_src.get_data(); - bool in_quote = false; - for (int i = 0; i < len; i++) { - if (in_quote) { - if (src[i] == ')') { - in_quote = false; - } - } else { - if (src[i] == '(') { - in_quote = true; - } else if (src[i] == p_chr) { - return i; - } - } - } - - return -1; -} - -void TranslationPO::_cache_plural_tests(const String &p_plural_rule, Ref &p_node) { - // Some examples of p_plural_rule passed in can have the form: - // "n==0 ? 0 : n==1 ? 1 : n==2 ? 2 : n%100>=3 && n%100<=10 ? 3 : n%100>=11 && n%100<=99 ? 4 : 5" (Arabic) - // "n >= 2" (French) // When evaluating the last, especially careful with this one. - // "n != 1" (English) - - String rule = p_plural_rule; - if (rule.begins_with("(") && rule.ends_with(")")) { - int bcount = 0; - for (int i = 1; i < rule.length() - 1 && bcount >= 0; i++) { - if (rule[i] == '(') { - bcount++; - } else if (rule[i] == ')') { - bcount--; - } - } - if (bcount == 0) { - rule = rule.substr(1, rule.length() - 2); - } - } - - int first_ques_mark = _find_unquoted(rule, '?'); - int first_colon = _find_unquoted(rule, ':'); - - if (first_ques_mark == -1) { - p_node->regex = rule.strip_edges(); - return; - } - - p_node->regex = rule.substr(0, first_ques_mark).strip_edges(); - - p_node->left.instantiate(); - _cache_plural_tests(rule.substr(first_ques_mark + 1, first_colon - first_ques_mark - 1).strip_edges(), p_node->left); - p_node->right.instantiate(); - _cache_plural_tests(rule.substr(first_colon + 1).strip_edges(), p_node->right); -} - -void TranslationPO::set_plural_rule(const String &p_plural_rule) { - // Set plural_forms and plural_rule. - // p_plural_rule passed in has the form "Plural-Forms: nplurals=2; plural=(n >= 2);". - - int first_semi_col = p_plural_rule.find_char(';'); - plural_forms = p_plural_rule.substr(p_plural_rule.find_char('=') + 1, first_semi_col - (p_plural_rule.find_char('=') + 1)).to_int(); - - int expression_start = p_plural_rule.find_char('=', first_semi_col) + 1; - int second_semi_col = p_plural_rule.rfind_char(';'); - plural_rule = p_plural_rule.substr(expression_start, second_semi_col - expression_start).strip_edges(); - - // Setup the cache to make evaluating plural rule faster later on. - equi_tests.instantiate(); - _cache_plural_tests(plural_rule, equi_tests); - - expr.instantiate(); - input_name.push_back("n"); -} - void TranslationPO::add_message(const StringName &p_src_text, const StringName &p_xlated_text, const StringName &p_context) { HashMap> &map_id_str = translation_map[p_context]; @@ -268,14 +160,6 @@ void TranslationPO::add_plural_message(const StringName &p_src_text, const Vecto } } -int TranslationPO::get_plural_forms() const { - return plural_forms; -} - -String TranslationPO::get_plural_rule() const { - return plural_rule; -} - StringName TranslationPO::get_message(const StringName &p_src_text, const StringName &p_context) const { if (!translation_map.has(p_context) || !translation_map[p_context].has(p_src_text)) { return StringName(); @@ -345,6 +229,4 @@ int TranslationPO::get_message_count() const { } void TranslationPO::_bind_methods() { - ClassDB::bind_method(D_METHOD("get_plural_forms"), &TranslationPO::get_plural_forms); - ClassDB::bind_method(D_METHOD("get_plural_rule"), &TranslationPO::get_plural_rule); } diff --git a/core/string/translation_po.h b/core/string/translation_po.h index ba820c6ee49..945a2599103 100644 --- a/core/string/translation_po.h +++ b/core/string/translation_po.h @@ -33,7 +33,6 @@ //#define DEBUG_TRANSLATION_PO -#include "core/math/expression.h" #include "core/string/translation.h" class TranslationPO : public Translation { @@ -46,32 +45,11 @@ class TranslationPO : public Translation { // Strings without context have "" as first key. HashMap>> translation_map; - int plural_forms = 0; // 0 means no "Plural-Forms" is given in the PO header file. The min for all languages is 1. - String plural_rule; - - // Cache temporary variables related to _get_plural_index() to make it faster - class EQNode : public RefCounted { - public: - String regex; - Ref left; - Ref right; - }; - Ref equi_tests; - - int _find_unquoted(const String &p_src, char32_t p_chr) const; - int _eq_test(const Ref &p_node, const Variant &p_result) const; - - Vector input_name; - mutable Ref expr; - mutable Array input_val; mutable StringName last_plural_key; mutable StringName last_plural_context; mutable int last_plural_n = -1; // Set it to an impossible value at the beginning. mutable int last_plural_mapped_index = 0; - void _cache_plural_tests(const String &p_plural_rule, Ref &p_node); - int _get_plural_index(int p_n) const; - Vector _get_message_list() const override; Dictionary _get_messages() const override; void _set_messages(const Dictionary &p_messages) override; @@ -89,10 +67,6 @@ public: StringName get_plural_message(const StringName &p_src_text, const StringName &p_plural_text, int p_n, const StringName &p_context = "") const override; void erase_message(const StringName &p_src_text, const StringName &p_context = "") override; - void set_plural_rule(const String &p_plural_rule); - int get_plural_forms() const; - String get_plural_rule() const; - #ifdef DEBUG_TRANSLATION_PO void print_translation_map(); #endif diff --git a/doc/classes/Translation.xml b/doc/classes/Translation.xml index cc63247266f..4fe4bc0a96f 100644 --- a/doc/classes/Translation.xml +++ b/doc/classes/Translation.xml @@ -29,6 +29,14 @@ Virtual method to override [method get_plural_message]. + + + + + + Virtual method to override [method get_plural_messages]. + + @@ -77,6 +85,12 @@ Returns all the messages (keys). + + + + Returns number of plural forms. + + @@ -88,6 +102,14 @@ The number [param n] is the number or quantity of the plural object. It will be used to guide the translation system to fetch the correct plural form for the selected language. + + + + + + Returns an array of message's translation plural forms. + + @@ -99,5 +121,8 @@ The locale of the translation. + + The plural forms rule string. + diff --git a/editor/import/resource_importer_csv_translation.cpp b/editor/import/resource_importer_csv_translation.cpp index 17f6070d351..c2f4892a911 100644 --- a/editor/import/resource_importer_csv_translation.cpp +++ b/editor/import/resource_importer_csv_translation.cpp @@ -96,6 +96,7 @@ Error ResourceImporterCSVTranslation::import(ResourceUID::ID p_source_id, const Vector locales; Vector> translations; + Vector>> messages; HashSet skipped_locales; for (int i = 1; i < line.size(); i++) { @@ -114,6 +115,7 @@ Error ResourceImporterCSVTranslation::import(ResourceUID::ID p_source_id, const translation.instantiate(); translation->set_locale(locale); translations.push_back(translation); + messages.push_back(HashMap>()); } do { @@ -127,13 +129,28 @@ Error ResourceImporterCSVTranslation::import(ResourceUID::ID p_source_id, const if (skipped_locales.has(i)) { continue; } - translations.write[write_index++]->add_message(key, line[i].c_unescape()); + if (key.to_lower() == "_pluralrule") { + translations.write[write_index++]->set_plural_rule(line[i].c_unescape()); + continue; + } + if (line[i].is_empty()) { + write_index++; + } else { + messages.write[write_index++][key].push_back(line[i].c_unescape()); + } } } } while (!f->eof_reached()); for (int i = 0; i < translations.size(); i++) { Ref xlt = translations[i]; + for (const KeyValue> &E : messages[i]) { + if (E.value.size() == 1) { + xlt->add_message(E.key, E.value[0]); + } else if (E.value.size() > 1) { + xlt->add_plural_message(E.key, E.value); + } + } if (compress) { Ref cxl = memnew(OptimizedTranslation); diff --git a/tests/core/string/test_translation.h b/tests/core/string/test_translation.h index 2b1069d40c4..55d5e6b5824 100644 --- a/tests/core/string/test_translation.h +++ b/tests/core/string/test_translation.h @@ -138,6 +138,22 @@ TEST_CASE("[OptimizedTranslation] Generate from Translation and read messages") translation->add_message("Hello2", "Bonjour2"); translation->add_message("Hello3", "Bonjour3"); + translation->set_plural_rule("nplurals=2; plural=(n >= 2);"); + CHECK(translation->get_plural_forms() == 2); + + PackedStringArray plurals; + plurals.push_back("Il y a %d pomme"); + plurals.push_back("Il y a %d pommes"); + translation->add_plural_message("There are %d apples", plurals); + + ERR_PRINT_OFF; + // This is invalid, as the number passed to `get_plural_message()` may not be negative. + CHECK(vformat(translation->get_plural_message("There are %d apples", "", -1), -1) == ""); + ERR_PRINT_ON; + CHECK(vformat(translation->get_plural_message("There are %d apples", "", 0), 0) == "Il y a 0 pomme"); + CHECK(vformat(translation->get_plural_message("There are %d apples", "", 1), 1) == "Il y a 1 pomme"); + CHECK(vformat(translation->get_plural_message("There are %d apples", "", 2), 2) == "Il y a 2 pommes"); + Ref optimized_translation = memnew(OptimizedTranslation); optimized_translation->generate(translation); CHECK(optimized_translation->get_message("Hello") == "Bonjour"); @@ -145,6 +161,14 @@ TEST_CASE("[OptimizedTranslation] Generate from Translation and read messages") CHECK(optimized_translation->get_message("Hello3") == "Bonjour3"); CHECK(optimized_translation->get_message("DoesNotExist") == ""); + ERR_PRINT_OFF; + // This is invalid, as the number passed to `get_plural_message()` may not be negative. + CHECK(vformat(optimized_translation->get_plural_message("There are %d apples", "", -1), -1) == ""); + ERR_PRINT_ON; + CHECK(vformat(optimized_translation->get_plural_message("There are %d apples", "", 0), 0) == "Il y a 0 pomme"); + CHECK(vformat(optimized_translation->get_plural_message("There are %d apples", "", 1), 1) == "Il y a 1 pomme"); + CHECK(vformat(optimized_translation->get_plural_message("There are %d apples", "", 2), 2) == "Il y a 2 pommes"); + List messages; // `get_message_list()` can't return the list of messages stored in an OptimizedTranslation. optimized_translation->get_message_list(&messages); @@ -164,7 +188,7 @@ TEST_CASE("[TranslationCSV] CSV import") { Error result = import_csv_translation->import(0, TestUtils::get_data_path("translations.csv"), "", options, nullptr, &gen_files); CHECK(result == OK); - CHECK(gen_files.size() == 4); + CHECK(gen_files.size() == 5); TranslationServer *ts = TranslationServer::get_singleton(); @@ -190,6 +214,15 @@ TEST_CASE("[TranslationCSV] CSV import") { CHECK(ts->tr("GOOD_MORNING") == String::utf8("おはよう")); CHECK(ts->tr("GOOD_EVENING") == String::utf8("こんばんは")); + ts->set_locale("fr"); + ERR_PRINT_OFF; + // This is invalid, as the number passed to `translate_plural()` may not be negative. + CHECK(vformat(ts->translate_plural("There are %d apples", "", -1), -1) == ""); + ERR_PRINT_ON; + CHECK(vformat(ts->translate_plural("There are %d apples", "", 0), 0) == "Il y a 0 pomme"); + CHECK(vformat(ts->translate_plural("There are %d apples", "", 1), 1) == "Il y a 1 pomme"); + CHECK(vformat(ts->translate_plural("There are %d apples", "", 2), 2) == "Il y a 2 pommes"); + /* FIXME: This passes, but triggers a chain reaction that makes test_viewport * and test_text_edit explode in a billion glittery Unicode particles. ts->set_locale("fa"); diff --git a/tests/data/translations.csv b/tests/data/translations.csv index 6b5efc9b91c..a1bc438c444 100644 --- a/tests/data/translations.csv +++ b/tests/data/translations.csv @@ -1,3 +1,6 @@ -keys,en,de,ja,fa -GOOD_MORNING,"Good Morning","Guten Morgen","おはよう","صبح بخیر" -GOOD_EVENING,"Good Evening","","こんばんは","عصر بخیر" +keys,en,de,ja,fa,fr +_PluralRule,"","","","","nplurals=2; plural=(n >= 2);" +GOOD_MORNING,"Good Morning","Guten Morgen","おはよう","صبح بخیر","" +GOOD_EVENING,"Good Evening","","こんばんは","عصر بخیر","" +"There are %d apples","","","","","Il y a %d pomme" +"There are %d apples","","","","","Il y a %d pommes"