From 7bdae7559d8b37e11818286a84edb1a11bcca141 Mon Sep 17 00:00:00 2001 From: Michael Alexsander Date: Wed, 3 Apr 2024 01:16:11 -0300 Subject: [PATCH] Improve string extraction of ETR POT file --- editor/editor_translation.cpp | 166 +++++++++++++++++++++++++++++++--- editor/editor_translation.h | 2 +- editor/pot_generator.cpp | 7 +- 3 files changed, 154 insertions(+), 21 deletions(-) diff --git a/editor/editor_translation.cpp b/editor/editor_translation.cpp index 194d78326d5..77154ec3443 100644 --- a/editor/editor_translation.cpp +++ b/editor/editor_translation.cpp @@ -156,30 +156,166 @@ void load_extractable_translations(const String &p_locale) { } } -List get_extractable_message_list() { +Vector> get_extractable_message_list() { ExtractableTranslationList *etl = _extractable_translations; - List msgids; + Vector> list; + while (etl->data) { - if (!strcmp(etl->lang, "source")) { - Vector data; - data.resize(etl->uncomp_size); - int ret = Compression::decompress(data.ptrw(), etl->uncomp_size, etl->data, etl->comp_size, Compression::MODE_DEFLATE); - ERR_FAIL_COND_V_MSG(ret == -1, msgids, "Compressed file is corrupt."); + if (strcmp(etl->lang, "source")) { + etl++; + continue; + } - Ref fa; - fa.instantiate(); - fa->open_custom(data.ptr(), data.size()); + Vector data; + data.resize(etl->uncomp_size); + int ret = Compression::decompress(data.ptrw(), etl->uncomp_size, etl->data, etl->comp_size, Compression::MODE_DEFLATE); + ERR_FAIL_COND_V_MSG(ret == -1, list, "Compressed file is corrupt."); - Ref tr = TranslationLoaderPO::load_translation(fa); + Ref fa; + fa.instantiate(); + fa->open_custom(data.ptr(), data.size()); - if (tr.is_valid()) { - tr->get_message_list(&msgids); - break; + // Taken from TranslationLoaderPO, modified to work specifically with POTs. + { + const String path = fa->get_path(); + + fa->seek(0); + + enum Status { + STATUS_NONE, + STATUS_READING_ID, + STATUS_READING_STRING, + STATUS_READING_CONTEXT, + STATUS_READING_PLURAL, + }; + + Status status = STATUS_NONE; + + String msg_id; + String msg_id_plural; + String msg_context; + + int line = 1; + bool entered_context = false; + bool is_eof = false; + + while (!is_eof) { + String l = fa->get_line().strip_edges(); + is_eof = fa->eof_reached(); + + // If we reached last line and it's not a content line, break, otherwise let processing that last loop. + if (is_eof && l.is_empty()) { + if (status == STATUS_READING_ID || status == STATUS_READING_CONTEXT || status == STATUS_READING_PLURAL) { + ERR_FAIL_V_MSG(Vector>(), "Unexpected EOF while reading POT file at: " + path + ":" + itos(line)); + } else { + break; + } + } + + if (l.begins_with("msgctxt")) { + ERR_FAIL_COND_V_MSG(status != STATUS_READING_STRING && status != STATUS_READING_PLURAL, Vector>(), + "Unexpected 'msgctxt', was expecting 'msgid_plural' or 'msgstr' before 'msgctxt' while parsing: " + path + ":" + itos(line)); + + // In POT files, "msgctxt" appears before "msgid". If we encounter a "msgctxt", we add what we have read + // and set "entered_context" to true to prevent adding twice. + if (!msg_id.is_empty()) { + Vector msgs; + msgs.push_back(msg_id); + msgs.push_back(msg_context); + msgs.push_back(msg_id_plural); + list.push_back(msgs); + } + msg_context = ""; + l = l.substr(7, l.length()).strip_edges(); + status = STATUS_READING_CONTEXT; + entered_context = true; + } + + if (l.begins_with("msgid_plural")) { + if (status != STATUS_READING_ID) { + ERR_FAIL_V_MSG(Vector>(), "Unexpected 'msgid_plural', was expecting 'msgid' before 'msgid_plural' while parsing: " + path + ":" + itos(line)); + } + l = l.substr(12, l.length()).strip_edges(); + status = STATUS_READING_PLURAL; + } else if (l.begins_with("msgid")) { + ERR_FAIL_COND_V_MSG(status == STATUS_READING_ID, Vector>(), "Unexpected 'msgid', was expecting 'msgstr' while parsing: " + path + ":" + itos(line)); + + if (!msg_id.is_empty() && !entered_context) { + Vector msgs; + msgs.push_back(msg_id); + msgs.push_back(msg_context); + msgs.push_back(msg_id_plural); + list.push_back(msgs); + } + + l = l.substr(5, l.length()).strip_edges(); + status = STATUS_READING_ID; + // If we did not encounter msgctxt, we reset context to empty to reset it. + if (!entered_context) { + msg_context = ""; + } + msg_id = ""; + msg_id_plural = ""; + entered_context = false; + } + + if (l.begins_with("msgstr[")) { + ERR_FAIL_COND_V_MSG(status != STATUS_READING_PLURAL, Vector>(), + "Unexpected 'msgstr[]', was expecting 'msgid_plural' before 'msgstr[]' while parsing: " + path + ":" + itos(line)); + l = l.substr(9, l.length()).strip_edges(); + } else if (l.begins_with("msgstr")) { + ERR_FAIL_COND_V_MSG(status != STATUS_READING_ID, Vector>(), + "Unexpected 'msgstr', was expecting 'msgid' before 'msgstr' while parsing: " + path + ":" + itos(line)); + l = l.substr(6, l.length()).strip_edges(); + status = STATUS_READING_STRING; + } + + if (l.is_empty() || l.begins_with("#")) { + line++; + continue; // Nothing to read or comment. + } + + ERR_FAIL_COND_V_MSG(!l.begins_with("\"") || status == STATUS_NONE, Vector>(), "Invalid line '" + l + "' while parsing: " + path + ":" + itos(line)); + + l = l.substr(1, l.length()); + // Find final quote, ignoring escaped ones (\"). + // The escape_next logic is necessary to properly parse things like \\" + // where the backslash is the one being escaped, not the quote. + int end_pos = -1; + bool escape_next = false; + for (int i = 0; i < l.length(); i++) { + if (l[i] == '\\' && !escape_next) { + escape_next = true; + continue; + } + + if (l[i] == '"' && !escape_next) { + end_pos = i; + break; + } + + escape_next = false; + } + + ERR_FAIL_COND_V_MSG(end_pos == -1, Vector>(), "Expected '\"' at end of message while parsing: " + path + ":" + itos(line)); + + l = l.substr(0, end_pos); + l = l.c_unescape(); + + if (status == STATUS_READING_ID) { + msg_id += l; + } else if (status == STATUS_READING_CONTEXT) { + msg_context += l; + } else if (status == STATUS_READING_PLURAL) { + msg_id_plural += l; + } + + line++; } } etl++; } - return msgids; + return list; } diff --git a/editor/editor_translation.h b/editor/editor_translation.h index 4785495629b..eee9e533c48 100644 --- a/editor/editor_translation.h +++ b/editor/editor_translation.h @@ -40,6 +40,6 @@ void load_editor_translations(const String &p_locale); void load_property_translations(const String &p_locale); void load_doc_translations(const String &p_locale); void load_extractable_translations(const String &p_locale); -List get_extractable_message_list(); +Vector> get_extractable_message_list(); #endif // EDITOR_TRANSLATION_H diff --git a/editor/pot_generator.cpp b/editor/pot_generator.cpp index 8323ae944be..76b6593f1d5 100644 --- a/editor/pot_generator.cpp +++ b/editor/pot_generator.cpp @@ -34,7 +34,6 @@ #include "core/error/error_macros.h" #include "editor/editor_translation.h" #include "editor/editor_translation_parser.h" -#include "plugins/packed_scene_translation_parser_plugin.h" POTGenerator *POTGenerator::singleton = nullptr; @@ -66,8 +65,6 @@ void POTGenerator::generate_pot(const String &p_file) { // Clear all_translation_strings of the previous round. all_translation_strings.clear(); - List extractable_msgids = get_extractable_message_list(); - // Collect all translatable strings according to files order in "POT Generation" setting. for (int i = 0; i < files.size(); i++) { Vector msgids; @@ -92,8 +89,8 @@ void POTGenerator::generate_pot(const String &p_file) { } if (GLOBAL_GET("internationalization/locale/translation_add_builtin_strings_to_pot")) { - for (const StringName &extractable_msgid : extractable_msgids) { - _add_new_msgid(extractable_msgid, "", "", ""); + for (const Vector &extractable_msgids : get_extractable_message_list()) { + _add_new_msgid(extractable_msgids[0], extractable_msgids[1], extractable_msgids[2], ""); } }