From 35e8fd4d047748acf98ddb5a7319579853d333ab Mon Sep 17 00:00:00 2001 From: bruvzg <7645683+bruvzg@users.noreply.github.com> Date: Fri, 18 Mar 2022 14:19:17 +0200 Subject: [PATCH] Add binary MO translation file support. --- core/io/translation_loader_po.cpp | 477 ++++++++++++++++++------------ 1 file changed, 292 insertions(+), 185 deletions(-) diff --git a/core/io/translation_loader_po.cpp b/core/io/translation_loader_po.cpp index 8d3e58cad1b..801bd8b0bf1 100644 --- a/core/io/translation_loader_po.cpp +++ b/core/io/translation_loader_po.cpp @@ -35,98 +35,160 @@ #include "core/string/translation_po.h" RES TranslationLoaderPO::load_translation(FileAccess *f, Error *r_error) { - enum Status { - STATUS_NONE, - STATUS_READING_ID, - STATUS_READING_STRING, - STATUS_READING_CONTEXT, - STATUS_READING_PLURAL, - }; - - Status status = STATUS_NONE; - - String msg_id; - String msg_str; - String msg_context; - Vector msgs_plural; - String config; - if (r_error) { *r_error = ERR_FILE_CORRUPT; } - Ref translation = Ref(memnew(TranslationPO)); - int line = 1; - int plural_forms = 0; - int plural_index = -1; - bool entered_context = false; - bool skip_this = false; - bool skip_next = false; - bool is_eof = false; const String path = f->get_path(); + Ref translation = Ref(memnew(TranslationPO)); + String config; - while (!is_eof) { - String l = f->get_line().strip_edges(); - is_eof = f->eof_reached(); + uint32_t magic = f->get_32(); + if (magic == 0x950412de) { + // Load binary MO file. - // If we reached last line and it's not a content line, break, otherwise let processing that last loop - if (is_eof && l.is_empty()) { - if (status == STATUS_READING_ID || status == STATUS_READING_CONTEXT || (status == STATUS_READING_PLURAL && plural_index != plural_forms - 1)) { - memdelete(f); - ERR_FAIL_V_MSG(RES(), "Unexpected EOF while reading PO file at: " + path + ":" + itos(line)); - } else { - break; - } + uint16_t version_maj = f->get_16(); + uint16_t version_min = f->get_16(); + if (version_maj > 1) { + ERR_FAIL_V_MSG(RES(), vformat("Unsupported MO file %s, version %d.%d.", path, version_maj, version_min)); } - if (l.begins_with("msgctxt")) { - if (status != STATUS_READING_STRING && status != STATUS_READING_PLURAL) { - memdelete(f); - ERR_FAIL_V_MSG(RES(), "Unexpected 'msgctxt', was expecting 'msgid_plural' or 'msgstr' before 'msgctxt' while parsing: " + path + ":" + itos(line)); - } + uint32_t num_strings = f->get_32(); + uint32_t id_table_offset = f->get_32(); + uint32_t trans_table_offset = f->get_32(); - // In PO file, "msgctxt" appears before "msgid". If we encounter a "msgctxt", we add what we have read - // and set "entered_context" to true to prevent adding twice. - if (!skip_this && !msg_id.is_empty()) { - if (status == STATUS_READING_STRING) { - translation->add_message(msg_id, msg_str, msg_context); - } else if (status == STATUS_READING_PLURAL) { - if (plural_index != plural_forms - 1) { - memdelete(f); - ERR_FAIL_V_MSG(RES(), "Number of 'msgstr[]' doesn't match with number of plural forms: " + path + ":" + itos(line)); + // Read string tables. + for (uint32_t i = 0; i < num_strings; i++) { + String msg_id; + String msg_id_plural; + String msg_context; + + // Read id strings and context. + { + Vector data; + f->seek(id_table_offset + i * 8); + uint32_t str_start = 0; + uint32_t str_len = f->get_32(); + uint32_t str_offset = f->get_32(); + + data.resize(str_len + 1); + f->seek(str_offset); + f->get_buffer(data.ptrw(), str_len); + data.write[str_len] = 0; + + bool is_plural = false; + for (uint32_t j = 0; j < str_len + 1; j++) { + if (data[j] == 0x04) { + msg_context.parse_utf8((const char *)data.ptr(), j); + str_start = j + 1; + } + if (data[j] == 0x00) { + if (is_plural) { + msg_id_plural.parse_utf8((const char *)(data.ptr() + str_start), j - str_start); + } else { + msg_id.parse_utf8((const char *)(data.ptr() + str_start), j - str_start); + is_plural = true; + } + str_start = j + 1; + } + } + } + + // Read translated strings. + { + Vector data; + f->seek(trans_table_offset + i * 8); + uint32_t str_start = 0; + uint32_t str_len = f->get_32(); + uint32_t str_offset = f->get_32(); + + data.resize(str_len + 1); + f->seek(str_offset); + f->get_buffer(data.ptrw(), str_len); + data.write[str_len] = 0; + + if (msg_id.is_empty()) { + config = String::utf8((const char *)data.ptr(), str_len); + // Record plural rule. + int p_start = config.find("Plural-Forms"); + if (p_start != -1) { + int p_end = config.find("\n", p_start); + translation->set_plural_rule(config.substr(p_start, p_end - p_start)); + } + } else { + Vector plural_msg; + for (uint32_t j = 0; j < str_len + 1; j++) { + if (data[j] == 0x00) { + if (msg_id_plural.is_empty()) { + translation->add_message(msg_id, String::utf8((const char *)(data.ptr() + str_start), j - str_start), msg_context); + } else { + plural_msg.push_back(String::utf8((const char *)(data.ptr() + str_start), j - str_start)); + } + str_start = j + 1; + } + } + if (!plural_msg.is_empty()) { + translation->add_plural_message(msg_id, plural_msg, msg_context); } - translation->add_plural_message(msg_id, msgs_plural, msg_context); } } - msg_context = ""; - l = l.substr(7, l.length()).strip_edges(); - status = STATUS_READING_CONTEXT; - entered_context = true; } - if (l.begins_with("msgid_plural")) { - if (plural_forms == 0) { - memdelete(f); - ERR_FAIL_V_MSG(RES(), "PO file uses 'msgid_plural' but 'Plural-Forms' is invalid or missing in header: " + path + ":" + itos(line)); - } else if (status != STATUS_READING_ID) { - memdelete(f); - ERR_FAIL_V_MSG(RES(), "Unexpected 'msgid_plural', was expecting 'msgid' before 'msgid_plural' while parsing: " + path + ":" + itos(line)); - } - // We don't record the message in "msgid_plural" itself as tr_n(), TTRN(), RTRN() interfaces provide the plural string already. - // We just have to reset variables related to plurals for "msgstr[]" later on. - l = l.substr(12, l.length()).strip_edges(); - plural_index = -1; - msgs_plural.clear(); - msgs_plural.resize(plural_forms); - status = STATUS_READING_PLURAL; - } else if (l.begins_with("msgid")) { - if (status == STATUS_READING_ID) { - memdelete(f); - ERR_FAIL_V_MSG(RES(), "Unexpected 'msgid', was expecting 'msgstr' while parsing: " + path + ":" + itos(line)); + memdelete(f); + } else { + // Try to load as text PO file. + f->seek(0); + + enum Status { + STATUS_NONE, + STATUS_READING_ID, + STATUS_READING_STRING, + STATUS_READING_CONTEXT, + STATUS_READING_PLURAL, + }; + + Status status = STATUS_NONE; + + String msg_id; + String msg_str; + String msg_context; + Vector msgs_plural; + + if (r_error) { + *r_error = ERR_FILE_CORRUPT; + } + + int line = 1; + int plural_forms = 0; + int plural_index = -1; + bool entered_context = false; + bool skip_this = false; + bool skip_next = false; + bool is_eof = false; + + while (!is_eof) { + String l = f->get_line().strip_edges(); + is_eof = f->eof_reached(); + + // If we reached last line and it's not a content line, break, otherwise let processing that last loop + if (is_eof && l.is_empty()) { + if (status == STATUS_READING_ID || status == STATUS_READING_CONTEXT || (status == STATUS_READING_PLURAL && plural_index != plural_forms - 1)) { + memdelete(f); + ERR_FAIL_V_MSG(RES(), "Unexpected EOF while reading PO file at: " + path + ":" + itos(line)); + } else { + break; + } } - if (!msg_id.is_empty()) { - if (!skip_this && !entered_context) { + if (l.begins_with("msgctxt")) { + if (status != STATUS_READING_STRING && status != STATUS_READING_PLURAL) { + memdelete(f); + ERR_FAIL_V_MSG(RES(), "Unexpected 'msgctxt', was expecting 'msgid_plural' or 'msgstr' before 'msgctxt' while parsing: " + path + ":" + itos(line)); + } + + // In PO file, "msgctxt" appears before "msgid". If we encounter a "msgctxt", we add what we have read + // and set "entered_context" to true to prevent adding twice. + if (!skip_this && !msg_id.is_empty()) { if (status == STATUS_READING_STRING) { translation->add_message(msg_id, msg_str, msg_context); } else if (status == STATUS_READING_PLURAL) { @@ -137,119 +199,163 @@ RES TranslationLoaderPO::load_translation(FileAccess *f, Error *r_error) { translation->add_plural_message(msg_id, msgs_plural, msg_context); } } + msg_context = ""; + l = l.substr(7, l.length()).strip_edges(); + status = STATUS_READING_CONTEXT; + entered_context = true; + } + + if (l.begins_with("msgid_plural")) { + if (plural_forms == 0) { + memdelete(f); + ERR_FAIL_V_MSG(RES(), "PO file uses 'msgid_plural' but 'Plural-Forms' is invalid or missing in header: " + path + ":" + itos(line)); + } else if (status != STATUS_READING_ID) { + memdelete(f); + ERR_FAIL_V_MSG(RES(), "Unexpected 'msgid_plural', was expecting 'msgid' before 'msgid_plural' while parsing: " + path + ":" + itos(line)); + } + // We don't record the message in "msgid_plural" itself as tr_n(), TTRN(), RTRN() interfaces provide the plural string already. + // We just have to reset variables related to plurals for "msgstr[]" later on. + l = l.substr(12, l.length()).strip_edges(); + plural_index = -1; + msgs_plural.clear(); + msgs_plural.resize(plural_forms); + status = STATUS_READING_PLURAL; + } else if (l.begins_with("msgid")) { + if (status == STATUS_READING_ID) { + memdelete(f); + ERR_FAIL_V_MSG(RES(), "Unexpected 'msgid', was expecting 'msgstr' while parsing: " + path + ":" + itos(line)); + } + + if (!msg_id.is_empty()) { + if (!skip_this && !entered_context) { + if (status == STATUS_READING_STRING) { + translation->add_message(msg_id, msg_str, msg_context); + } else if (status == STATUS_READING_PLURAL) { + if (plural_index != plural_forms - 1) { + memdelete(f); + ERR_FAIL_V_MSG(RES(), "Number of 'msgstr[]' doesn't match with number of plural forms: " + path + ":" + itos(line)); + } + translation->add_plural_message(msg_id, msgs_plural, msg_context); + } + } + } else if (config.is_empty()) { + config = msg_str; + // Record plural rule. + int p_start = config.find("Plural-Forms"); + if (p_start != -1) { + int p_end = config.find("\n", p_start); + translation->set_plural_rule(config.substr(p_start, p_end - p_start)); + plural_forms = translation->get_plural_forms(); + } + } + + l = l.substr(5, l.length()).strip_edges(); + status = STATUS_READING_ID; + // If we did not encounter msgctxt, we reset context to empty to reset it. + if (!entered_context) { + msg_context = ""; + } + msg_id = ""; + msg_str = ""; + skip_this = skip_next; + skip_next = false; + entered_context = false; + } + + if (l.begins_with("msgstr[")) { + if (status != STATUS_READING_PLURAL) { + memdelete(f); + ERR_FAIL_V_MSG(RES(), "Unexpected 'msgstr[]', was expecting 'msgid_plural' before 'msgstr[]' while parsing: " + path + ":" + itos(line)); + } + plural_index++; // Increment to add to the next slot in vector msgs_plural. + l = l.substr(9, l.length()).strip_edges(); + } else if (l.begins_with("msgstr")) { + if (status != STATUS_READING_ID) { + memdelete(f); + ERR_FAIL_V_MSG(RES(), "Unexpected 'msgstr', was expecting 'msgid' before 'msgstr' while parsing: " + path + ":" + itos(line)); + } + + l = l.substr(6, l.length()).strip_edges(); + status = STATUS_READING_STRING; + } + + if (l.is_empty() || l.begins_with("#")) { + if (l.contains("fuzzy")) { + skip_next = true; + } + line++; + continue; // Nothing to read or comment. + } + + if (!l.begins_with("\"") || status == STATUS_NONE) { + memdelete(f); + ERR_FAIL_V_MSG(RES(), "Invalid line '" + l + "' while parsing: " + path + ":" + itos(line)); + } + + l = l.substr(1, l.length()); + // Find final quote, ignoring escaped ones (\"). + // The escape_next logic is necessary to properly parse things like \\" + // where the backslash is the one being escaped, not the quote. + int end_pos = -1; + bool escape_next = false; + for (int i = 0; i < l.length(); i++) { + if (l[i] == '\\' && !escape_next) { + escape_next = true; + continue; + } + + if (l[i] == '"' && !escape_next) { + end_pos = i; + break; + } + + escape_next = false; + } + + if (end_pos == -1) { + memdelete(f); + ERR_FAIL_V_MSG(RES(), "Expected '\"' at end of message while parsing: " + path + ":" + itos(line)); + } + + l = l.substr(0, end_pos); + l = l.c_unescape(); + + if (status == STATUS_READING_ID) { + msg_id += l; + } else if (status == STATUS_READING_STRING) { + msg_str += l; + } else if (status == STATUS_READING_CONTEXT) { + msg_context += l; + } else if (status == STATUS_READING_PLURAL && plural_index >= 0) { + if (plural_index >= plural_forms) { + memdelete(f); + ERR_FAIL_V_MSG(RES(), "Unexpected plural form while parsing: " + path + ":" + itos(line)); + } + msgs_plural.write[plural_index] = msgs_plural[plural_index] + l; + } + + line++; + } + + memdelete(f); + + // Add the last set of data from last iteration. + if (status == STATUS_READING_STRING) { + if (!msg_id.is_empty()) { + if (!skip_this) { + translation->add_message(msg_id, msg_str, msg_context); + } } else if (config.is_empty()) { config = msg_str; - // Record plural rule. - int p_start = config.find("Plural-Forms"); - if (p_start != -1) { - int p_end = config.find("\n", p_start); - translation->set_plural_rule(config.substr(p_start, p_end - p_start)); - plural_forms = translation->get_plural_forms(); + } + } else if (status == STATUS_READING_PLURAL) { + if (!skip_this && !msg_id.is_empty()) { + if (plural_index != plural_forms - 1) { + memdelete(f); + ERR_FAIL_V_MSG(RES(), "Number of 'msgstr[]' doesn't match with number of plural forms: " + path + ":" + itos(line)); } + translation->add_plural_message(msg_id, msgs_plural, msg_context); } - - l = l.substr(5, l.length()).strip_edges(); - status = STATUS_READING_ID; - // If we did not encounter msgctxt, we reset context to empty to reset it. - if (!entered_context) { - msg_context = ""; - } - msg_id = ""; - msg_str = ""; - skip_this = skip_next; - skip_next = false; - entered_context = false; - } - - if (l.begins_with("msgstr[")) { - if (status != STATUS_READING_PLURAL) { - memdelete(f); - ERR_FAIL_V_MSG(RES(), "Unexpected 'msgstr[]', was expecting 'msgid_plural' before 'msgstr[]' while parsing: " + path + ":" + itos(line)); - } - plural_index++; // Increment to add to the next slot in vector msgs_plural. - l = l.substr(9, l.length()).strip_edges(); - } else if (l.begins_with("msgstr")) { - if (status != STATUS_READING_ID) { - memdelete(f); - ERR_FAIL_V_MSG(RES(), "Unexpected 'msgstr', was expecting 'msgid' before 'msgstr' while parsing: " + path + ":" + itos(line)); - } - - l = l.substr(6, l.length()).strip_edges(); - status = STATUS_READING_STRING; - } - - if (l.is_empty() || l.begins_with("#")) { - if (l.contains("fuzzy")) { - skip_next = true; - } - line++; - continue; // Nothing to read or comment. - } - - if (!l.begins_with("\"") || status == STATUS_NONE) { - memdelete(f); - ERR_FAIL_V_MSG(RES(), "Invalid line '" + l + "' while parsing: " + path + ":" + itos(line)); - } - - l = l.substr(1, l.length()); - // Find final quote, ignoring escaped ones (\"). - // The escape_next logic is necessary to properly parse things like \\" - // where the backslash is the one being escaped, not the quote. - int end_pos = -1; - bool escape_next = false; - for (int i = 0; i < l.length(); i++) { - if (l[i] == '\\' && !escape_next) { - escape_next = true; - continue; - } - - if (l[i] == '"' && !escape_next) { - end_pos = i; - break; - } - - escape_next = false; - } - - if (end_pos == -1) { - memdelete(f); - ERR_FAIL_V_MSG(RES(), "Expected '\"' at end of message while parsing: " + path + ":" + itos(line)); - } - - l = l.substr(0, end_pos); - l = l.c_unescape(); - - if (status == STATUS_READING_ID) { - msg_id += l; - } else if (status == STATUS_READING_STRING) { - msg_str += l; - } else if (status == STATUS_READING_CONTEXT) { - msg_context += l; - } else if (status == STATUS_READING_PLURAL && plural_index >= 0) { - msgs_plural.write[plural_index] = msgs_plural[plural_index] + l; - } - - line++; - } - - memdelete(f); - - // Add the last set of data from last iteration. - if (status == STATUS_READING_STRING) { - if (!msg_id.is_empty()) { - if (!skip_this) { - translation->add_message(msg_id, msg_str, msg_context); - } - } else if (config.is_empty()) { - config = msg_str; - } - } else if (status == STATUS_READING_PLURAL) { - if (!skip_this && !msg_id.is_empty()) { - if (plural_index != plural_forms - 1) { - memdelete(f); - ERR_FAIL_V_MSG(RES(), "Number of 'msgstr[]' doesn't match with number of plural forms: " + path + ":" + itos(line)); - } - translation->add_plural_message(msg_id, msgs_plural, msg_context); } } @@ -290,6 +396,7 @@ RES TranslationLoaderPO::load(const String &p_path, const String &p_original_pat void TranslationLoaderPO::get_recognized_extensions(List *p_extensions) const { p_extensions->push_back("po"); + p_extensions->push_back("mo"); } bool TranslationLoaderPO::handles_type(const String &p_type) const { @@ -297,7 +404,7 @@ bool TranslationLoaderPO::handles_type(const String &p_type) const { } String TranslationLoaderPO::get_resource_type(const String &p_path) const { - if (p_path.get_extension().to_lower() == "po") { + if (p_path.get_extension().to_lower() == "po" || p_path.get_extension().to_lower() == "mo") { return "Translation"; } return "";