From 35e8fd4d047748acf98ddb5a7319579853d333ab Mon Sep 17 00:00:00 2001
From: bruvzg <7645683+bruvzg@users.noreply.github.com>
Date: Fri, 18 Mar 2022 14:19:17 +0200
Subject: [PATCH] Add binary MO translation file support.

---
 core/io/translation_loader_po.cpp | 477 ++++++++++++++++++------------
 1 file changed, 292 insertions(+), 185 deletions(-)
diff --git a/core/io/translation_loader_po.cpp b/core/io/translation_loader_po.cpp
index 8d3e58cad1b..801bd8b0bf1 100644
--- a/core/io/translation_loader_po.cpp
+++ b/core/io/translation_loader_po.cpp
@@ -35,98 +35,160 @@
 #include "core/string/translation_po.h"
 
 RES TranslationLoaderPO::load_translation(FileAccess *f, Error *r_error) {
-	enum Status {
-		STATUS_NONE,
-		STATUS_READING_ID,
-		STATUS_READING_STRING,
-		STATUS_READING_CONTEXT,
-		STATUS_READING_PLURAL,
-	};
-
-	Status status = STATUS_NONE;
-
-	String msg_id;
-	String msg_str;
-	String msg_context;
-	Vector<String> msgs_plural;
-	String config;
-
 	if (r_error) {
 		*r_error = ERR_FILE_CORRUPT;
 	}
 
-	Ref<TranslationPO> translation = Ref<TranslationPO>(memnew(TranslationPO));
-	int line = 1;
-	int plural_forms = 0;
-	int plural_index = -1;
-	bool entered_context = false;
-	bool skip_this = false;
-	bool skip_next = false;
-	bool is_eof = false;
 	const String path = f->get_path();
+	Ref<TranslationPO> translation = Ref<TranslationPO>(memnew(TranslationPO));
+	String config;
 
-	while (!is_eof) {
-		String l = f->get_line().strip_edges();
-		is_eof = f->eof_reached();
+	uint32_t magic = f->get_32();
+	if (magic == 0x950412de) {
+		// Load binary MO file.
 
-		// If we reached last line and it's not a content line, break, otherwise let processing that last loop
-		if (is_eof && l.is_empty()) {
-			if (status == STATUS_READING_ID || status == STATUS_READING_CONTEXT || (status == STATUS_READING_PLURAL && plural_index != plural_forms - 1)) {
-				memdelete(f);
-				ERR_FAIL_V_MSG(RES(), "Unexpected EOF while reading PO file at: " + path + ":" + itos(line));
-			} else {
-				break;
-			}
+		uint16_t version_maj = f->get_16();
+		uint16_t version_min = f->get_16();
+		if (version_maj > 1) {
+			ERR_FAIL_V_MSG(RES(), vformat("Unsupported MO file %s, version %d.%d.", path, version_maj, version_min));
 		}
 
-		if (l.begins_with("msgctxt")) {
-			if (status != STATUS_READING_STRING && status != STATUS_READING_PLURAL) {
-				memdelete(f);
-				ERR_FAIL_V_MSG(RES(), "Unexpected 'msgctxt', was expecting 'msgid_plural' or 'msgstr' before 'msgctxt' while parsing: " + path + ":" + itos(line));
-			}
+		uint32_t num_strings = f->get_32();
+		uint32_t id_table_offset = f->get_32();
+		uint32_t trans_table_offset = f->get_32();
 
-			// In PO file, "msgctxt" appears before "msgid". If we encounter a "msgctxt", we add what we have read
-			// and set "entered_context" to true to prevent adding twice.
-			if (!skip_this && !msg_id.is_empty()) {
-				if (status == STATUS_READING_STRING) {
-					translation->add_message(msg_id, msg_str, msg_context);
-				} else if (status == STATUS_READING_PLURAL) {
-					if (plural_index != plural_forms - 1) {
-						memdelete(f);
-						ERR_FAIL_V_MSG(RES(), "Number of 'msgstr[]' doesn't match with number of plural forms: " + path + ":" + itos(line));
+		// Read string tables.
+		for (uint32_t i = 0; i < num_strings; i++) {
+			String msg_id;
+			String msg_id_plural;
+			String msg_context;
+
+			// Read id strings and context.
+			{
+				Vector<uint8_t> data;
+				f->seek(id_table_offset + i * 8);
+				uint32_t str_start = 0;
+				uint32_t str_len = f->get_32();
+				uint32_t str_offset = f->get_32();
+
+				data.resize(str_len + 1);
+				f->seek(str_offset);
+				f->get_buffer(data.ptrw(), str_len);
+				data.write[str_len] = 0;
+
+				bool is_plural = false;
+				for (uint32_t j = 0; j < str_len + 1; j++) {
+					if (data[j] == 0x04) {
+						msg_context.parse_utf8((const char *)data.ptr(), j);
+						str_start = j + 1;
+					}
+					if (data[j] == 0x00) {
+						if (is_plural) {
+							msg_id_plural.parse_utf8((const char *)(data.ptr() + str_start), j - str_start);
+						} else {
+							msg_id.parse_utf8((const char *)(data.ptr() + str_start), j - str_start);
+							is_plural = true;
+						}
+						str_start = j + 1;
+					}
+				}
+			}
+
+			// Read translated strings.
+			{
+				Vector<uint8_t> data;
+				f->seek(trans_table_offset + i * 8);
+				uint32_t str_start = 0;
+				uint32_t str_len = f->get_32();
+				uint32_t str_offset = f->get_32();
+
+				data.resize(str_len + 1);
+				f->seek(str_offset);
+				f->get_buffer(data.ptrw(), str_len);
+				data.write[str_len] = 0;
+
+				if (msg_id.is_empty()) {
+					config = String::utf8((const char *)data.ptr(), str_len);
+					// Record plural rule.
+					int p_start = config.find("Plural-Forms");
+					if (p_start != -1) {
+						int p_end = config.find("\n", p_start);
+						translation->set_plural_rule(config.substr(p_start, p_end - p_start));
+					}
+				} else {
+					Vector<String> plural_msg;
+					for (uint32_t j = 0; j < str_len + 1; j++) {
+						if (data[j] == 0x00) {
+							if (msg_id_plural.is_empty()) {
+								translation->add_message(msg_id, String::utf8((const char *)(data.ptr() + str_start), j - str_start), msg_context);
+							} else {
+								plural_msg.push_back(String::utf8((const char *)(data.ptr() + str_start), j - str_start));
+							}
+							str_start = j + 1;
+						}
+					}
+					if (!plural_msg.is_empty()) {
+						translation->add_plural_message(msg_id, plural_msg, msg_context);
 					}
-					translation->add_plural_message(msg_id, msgs_plural, msg_context);
 				}
 			}
-			msg_context = "";
-			l = l.substr(7, l.length()).strip_edges();
-			status = STATUS_READING_CONTEXT;
-			entered_context = true;
 		}
 
-		if (l.begins_with("msgid_plural")) {
-			if (plural_forms == 0) {
-				memdelete(f);
-				ERR_FAIL_V_MSG(RES(), "PO file uses 'msgid_plural' but 'Plural-Forms' is invalid or missing in header: " + path + ":" + itos(line));
-			} else if (status != STATUS_READING_ID) {
-				memdelete(f);
-				ERR_FAIL_V_MSG(RES(), "Unexpected 'msgid_plural', was expecting 'msgid' before 'msgid_plural' while parsing: " + path + ":" + itos(line));
-			}
-			// We don't record the message in "msgid_plural" itself as tr_n(), TTRN(), RTRN() interfaces provide the plural string already.
-			// We just have to reset variables related to plurals for "msgstr[]" later on.
-			l = l.substr(12, l.length()).strip_edges();
-			plural_index = -1;
-			msgs_plural.clear();
-			msgs_plural.resize(plural_forms);
-			status = STATUS_READING_PLURAL;
-		} else if (l.begins_with("msgid")) {
-			if (status == STATUS_READING_ID) {
-				memdelete(f);
-				ERR_FAIL_V_MSG(RES(), "Unexpected 'msgid', was expecting 'msgstr' while parsing: " + path + ":" + itos(line));
+		memdelete(f);
+	} else {
+		// Try to load as text PO file.
+		f->seek(0);
+
+		enum Status {
+			STATUS_NONE,
+			STATUS_READING_ID,
+			STATUS_READING_STRING,
+			STATUS_READING_CONTEXT,
+			STATUS_READING_PLURAL,
+		};
+
+		Status status = STATUS_NONE;
+
+		String msg_id;
+		String msg_str;
+		String msg_context;
+		Vector<String> msgs_plural;
+
+		if (r_error) {
+			*r_error = ERR_FILE_CORRUPT;
+		}
+
+		int line = 1;
+		int plural_forms = 0;
+		int plural_index = -1;
+		bool entered_context = false;
+		bool skip_this = false;
+		bool skip_next = false;
+		bool is_eof = false;
+
+		while (!is_eof) {
+			String l = f->get_line().strip_edges();
+			is_eof = f->eof_reached();
+
+			// If we reached last line and it's not a content line, break, otherwise let processing that last loop
+			if (is_eof && l.is_empty()) {
+				if (status == STATUS_READING_ID || status == STATUS_READING_CONTEXT || (status == STATUS_READING_PLURAL && plural_index != plural_forms - 1)) {
+					memdelete(f);
+					ERR_FAIL_V_MSG(RES(), "Unexpected EOF while reading PO file at: " + path + ":" + itos(line));
+				} else {
+					break;
+				}
 			}
 
-			if (!msg_id.is_empty()) {
-				if (!skip_this && !entered_context) {
+			if (l.begins_with("msgctxt")) {
+				if (status != STATUS_READING_STRING && status != STATUS_READING_PLURAL) {
+					memdelete(f);
+					ERR_FAIL_V_MSG(RES(), "Unexpected 'msgctxt', was expecting 'msgid_plural' or 'msgstr' before 'msgctxt' while parsing: " + path + ":" + itos(line));
+				}
+
+				// In PO file, "msgctxt" appears before "msgid". If we encounter a "msgctxt", we add what we have read
+				// and set "entered_context" to true to prevent adding twice.
+				if (!skip_this && !msg_id.is_empty()) {
 					if (status == STATUS_READING_STRING) {
 						translation->add_message(msg_id, msg_str, msg_context);
 					} else if (status == STATUS_READING_PLURAL) {
@@ -137,119 +199,163 @@ RES TranslationLoaderPO::load_translation(FileAccess *f, Error *r_error) {
 						translation->add_plural_message(msg_id, msgs_plural, msg_context);
 					}
 				}
+				msg_context = "";
+				l = l.substr(7, l.length()).strip_edges();
+				status = STATUS_READING_CONTEXT;
+				entered_context = true;
+			}
+
+			if (l.begins_with("msgid_plural")) {
+				if (plural_forms == 0) {
+					memdelete(f);
+					ERR_FAIL_V_MSG(RES(), "PO file uses 'msgid_plural' but 'Plural-Forms' is invalid or missing in header: " + path + ":" + itos(line));
+				} else if (status != STATUS_READING_ID) {
+					memdelete(f);
+					ERR_FAIL_V_MSG(RES(), "Unexpected 'msgid_plural', was expecting 'msgid' before 'msgid_plural' while parsing: " + path + ":" + itos(line));
+				}
+				// We don't record the message in "msgid_plural" itself as tr_n(), TTRN(), RTRN() interfaces provide the plural string already.
+				// We just have to reset variables related to plurals for "msgstr[]" later on.
+				l = l.substr(12, l.length()).strip_edges();
+				plural_index = -1;
+				msgs_plural.clear();
+				msgs_plural.resize(plural_forms);
+				status = STATUS_READING_PLURAL;
+			} else if (l.begins_with("msgid")) {
+				if (status == STATUS_READING_ID) {
+					memdelete(f);
+					ERR_FAIL_V_MSG(RES(), "Unexpected 'msgid', was expecting 'msgstr' while parsing: " + path + ":" + itos(line));
+				}
+
+				if (!msg_id.is_empty()) {
+					if (!skip_this && !entered_context) {
+						if (status == STATUS_READING_STRING) {
+							translation->add_message(msg_id, msg_str, msg_context);
+						} else if (status == STATUS_READING_PLURAL) {
+							if (plural_index != plural_forms - 1) {
+								memdelete(f);
+								ERR_FAIL_V_MSG(RES(), "Number of 'msgstr[]' doesn't match with number of plural forms: " + path + ":" + itos(line));
+							}
+							translation->add_plural_message(msg_id, msgs_plural, msg_context);
+						}
+					}
+				} else if (config.is_empty()) {
+					config = msg_str;
+					// Record plural rule.
+					int p_start = config.find("Plural-Forms");
+					if (p_start != -1) {
+						int p_end = config.find("\n", p_start);
+						translation->set_plural_rule(config.substr(p_start, p_end - p_start));
+						plural_forms = translation->get_plural_forms();
+					}
+				}
+
+				l = l.substr(5, l.length()).strip_edges();
+				status = STATUS_READING_ID;
+				// If we did not encounter msgctxt, we reset context to empty to reset it.
+				if (!entered_context) {
+					msg_context = "";
+				}
+				msg_id = "";
+				msg_str = "";
+				skip_this = skip_next;
+				skip_next = false;
+				entered_context = false;
+			}
+
+			if (l.begins_with("msgstr[")) {
+				if (status != STATUS_READING_PLURAL) {
+					memdelete(f);
+					ERR_FAIL_V_MSG(RES(), "Unexpected 'msgstr[]', was expecting 'msgid_plural' before 'msgstr[]' while parsing: " + path + ":" + itos(line));
+				}
+				plural_index++; // Increment to add to the next slot in vector msgs_plural.
+				l = l.substr(9, l.length()).strip_edges();
+			} else if (l.begins_with("msgstr")) {
+				if (status != STATUS_READING_ID) {
+					memdelete(f);
+					ERR_FAIL_V_MSG(RES(), "Unexpected 'msgstr', was expecting 'msgid' before 'msgstr' while parsing: " + path + ":" + itos(line));
+				}
+
+				l = l.substr(6, l.length()).strip_edges();
+				status = STATUS_READING_STRING;
+			}
+
+			if (l.is_empty() || l.begins_with("#")) {
+				if (l.contains("fuzzy")) {
+					skip_next = true;
+				}
+				line++;
+				continue; // Nothing to read or comment.
+			}
+
+			if (!l.begins_with("\"") || status == STATUS_NONE) {
+				memdelete(f);
+				ERR_FAIL_V_MSG(RES(), "Invalid line '" + l + "' while parsing: " + path + ":" + itos(line));
+			}
+
+			l = l.substr(1, l.length());
+			// Find final quote, ignoring escaped ones (\").
+			// The escape_next logic is necessary to properly parse things like \\"
+			// where the backslash is the one being escaped, not the quote.
+			int end_pos = -1;
+			bool escape_next = false;
+			for (int i = 0; i < l.length(); i++) {
+				if (l[i] == '\\' && !escape_next) {
+					escape_next = true;
+					continue;
+				}
+
+				if (l[i] == '"' && !escape_next) {
+					end_pos = i;
+					break;
+				}
+
+				escape_next = false;
+			}
+
+			if (end_pos == -1) {
+				memdelete(f);
+				ERR_FAIL_V_MSG(RES(), "Expected '\"' at end of message while parsing: " + path + ":" + itos(line));
+			}
+
+			l = l.substr(0, end_pos);
+			l = l.c_unescape();
+
+			if (status == STATUS_READING_ID) {
+				msg_id += l;
+			} else if (status == STATUS_READING_STRING) {
+				msg_str += l;
+			} else if (status == STATUS_READING_CONTEXT) {
+				msg_context += l;
+			} else if (status == STATUS_READING_PLURAL && plural_index >= 0) {
+				if (plural_index >= plural_forms) {
+					memdelete(f);
+					ERR_FAIL_V_MSG(RES(), "Unexpected plural form while parsing: " + path + ":" + itos(line));
+				}
+				msgs_plural.write[plural_index] = msgs_plural[plural_index] + l;
+			}
+
+			line++;
+		}
+
+		memdelete(f);
+
+		// Add the last set of data from last iteration.
+		if (status == STATUS_READING_STRING) {
+			if (!msg_id.is_empty()) {
+				if (!skip_this) {
+					translation->add_message(msg_id, msg_str, msg_context);
+				}
 			} else if (config.is_empty()) {
 				config = msg_str;
-				// Record plural rule.
-				int p_start = config.find("Plural-Forms");
-				if (p_start != -1) {
-					int p_end = config.find("\n", p_start);
-					translation->set_plural_rule(config.substr(p_start, p_end - p_start));
-					plural_forms = translation->get_plural_forms();
+			}
+		} else if (status == STATUS_READING_PLURAL) {
+			if (!skip_this && !msg_id.is_empty()) {
+				if (plural_index != plural_forms - 1) {
+					memdelete(f);
+					ERR_FAIL_V_MSG(RES(), "Number of 'msgstr[]' doesn't match with number of plural forms: " + path + ":" + itos(line));
 				}
+				translation->add_plural_message(msg_id, msgs_plural, msg_context);
 			}
-
-			l = l.substr(5, l.length()).strip_edges();
-			status = STATUS_READING_ID;
-			// If we did not encounter msgctxt, we reset context to empty to reset it.
-			if (!entered_context) {
-				msg_context = "";
-			}
-			msg_id = "";
-			msg_str = "";
-			skip_this = skip_next;
-			skip_next = false;
-			entered_context = false;
-		}
-
-		if (l.begins_with("msgstr[")) {
-			if (status != STATUS_READING_PLURAL) {
-				memdelete(f);
-				ERR_FAIL_V_MSG(RES(), "Unexpected 'msgstr[]', was expecting 'msgid_plural' before 'msgstr[]' while parsing: " + path + ":" + itos(line));
-			}
-			plural_index++; // Increment to add to the next slot in vector msgs_plural.
-			l = l.substr(9, l.length()).strip_edges();
-		} else if (l.begins_with("msgstr")) {
-			if (status != STATUS_READING_ID) {
-				memdelete(f);
-				ERR_FAIL_V_MSG(RES(), "Unexpected 'msgstr', was expecting 'msgid' before 'msgstr' while parsing: " + path + ":" + itos(line));
-			}
-
-			l = l.substr(6, l.length()).strip_edges();
-			status = STATUS_READING_STRING;
-		}
-
-		if (l.is_empty() || l.begins_with("#")) {
-			if (l.contains("fuzzy")) {
-				skip_next = true;
-			}
-			line++;
-			continue; // Nothing to read or comment.
-		}
-
-		if (!l.begins_with("\"") || status == STATUS_NONE) {
-			memdelete(f);
-			ERR_FAIL_V_MSG(RES(), "Invalid line '" + l + "' while parsing: " + path + ":" + itos(line));
-		}
-
-		l = l.substr(1, l.length());
-		// Find final quote, ignoring escaped ones (\").
-		// The escape_next logic is necessary to properly parse things like \\"
-		// where the backslash is the one being escaped, not the quote.
-		int end_pos = -1;
-		bool escape_next = false;
-		for (int i = 0; i < l.length(); i++) {
-			if (l[i] == '\\' && !escape_next) {
-				escape_next = true;
-				continue;
-			}
-
-			if (l[i] == '"' && !escape_next) {
-				end_pos = i;
-				break;
-			}
-
-			escape_next = false;
-		}
-
-		if (end_pos == -1) {
-			memdelete(f);
-			ERR_FAIL_V_MSG(RES(), "Expected '\"' at end of message while parsing: " + path + ":" + itos(line));
-		}
-
-		l = l.substr(0, end_pos);
-		l = l.c_unescape();
-
-		if (status == STATUS_READING_ID) {
-			msg_id += l;
-		} else if (status == STATUS_READING_STRING) {
-			msg_str += l;
-		} else if (status == STATUS_READING_CONTEXT) {
-			msg_context += l;
-		} else if (status == STATUS_READING_PLURAL && plural_index >= 0) {
-			msgs_plural.write[plural_index] = msgs_plural[plural_index] + l;
-		}
-
-		line++;
-	}
-
-	memdelete(f);
-
-	// Add the last set of data from last iteration.
-	if (status == STATUS_READING_STRING) {
-		if (!msg_id.is_empty()) {
-			if (!skip_this) {
-				translation->add_message(msg_id, msg_str, msg_context);
-			}
-		} else if (config.is_empty()) {
-			config = msg_str;
-		}
-	} else if (status == STATUS_READING_PLURAL) {
-		if (!skip_this && !msg_id.is_empty()) {
-			if (plural_index != plural_forms - 1) {
-				memdelete(f);
-				ERR_FAIL_V_MSG(RES(), "Number of 'msgstr[]' doesn't match with number of plural forms: " + path + ":" + itos(line));
-			}
-			translation->add_plural_message(msg_id, msgs_plural, msg_context);
 		}
 	}
 
@@ -290,6 +396,7 @@ RES TranslationLoaderPO::load(const String &p_path, const String &p_original_pat
 
 void TranslationLoaderPO::get_recognized_extensions(List<String> *p_extensions) const {
 	p_extensions->push_back("po");
+	p_extensions->push_back("mo");
 }
 
 bool TranslationLoaderPO::handles_type(const String &p_type) const {
@@ -297,7 +404,7 @@ bool TranslationLoaderPO::handles_type(const String &p_type) const {
 }
 
 String TranslationLoaderPO::get_resource_type(const String &p_path) const {
-	if (p_path.get_extension().to_lower() == "po") {
+	if (p_path.get_extension().to_lower() == "po" || p_path.get_extension().to_lower() == "mo") {
 		return "Translation";
 	}
 	return "";