Improve string extraction of ETR POT file

This commit is contained in:
Michael Alexsander 2024-04-03 01:16:11 -03:00
parent 9c7e4031c0
commit 7bdae7559d
No known key found for this signature in database
GPG Key ID: A9C91EE110F4EABA
3 changed files with 154 additions and 21 deletions

View File

@ -156,30 +156,166 @@ void load_extractable_translations(const String &p_locale) {
}
}
List<StringName> get_extractable_message_list() {
Vector<Vector<String>> get_extractable_message_list() {
ExtractableTranslationList *etl = _extractable_translations;
List<StringName> msgids;
Vector<Vector<String>> list;
while (etl->data) {
if (!strcmp(etl->lang, "source")) {
Vector<uint8_t> data;
data.resize(etl->uncomp_size);
int ret = Compression::decompress(data.ptrw(), etl->uncomp_size, etl->data, etl->comp_size, Compression::MODE_DEFLATE);
ERR_FAIL_COND_V_MSG(ret == -1, msgids, "Compressed file is corrupt.");
if (strcmp(etl->lang, "source")) {
etl++;
continue;
}
Ref<FileAccessMemory> fa;
fa.instantiate();
fa->open_custom(data.ptr(), data.size());
Vector<uint8_t> data;
data.resize(etl->uncomp_size);
int ret = Compression::decompress(data.ptrw(), etl->uncomp_size, etl->data, etl->comp_size, Compression::MODE_DEFLATE);
ERR_FAIL_COND_V_MSG(ret == -1, list, "Compressed file is corrupt.");
Ref<Translation> tr = TranslationLoaderPO::load_translation(fa);
Ref<FileAccessMemory> fa;
fa.instantiate();
fa->open_custom(data.ptr(), data.size());
if (tr.is_valid()) {
tr->get_message_list(&msgids);
break;
// Taken from TranslationLoaderPO, modified to work specifically with POTs.
{
const String path = fa->get_path();
fa->seek(0);
enum Status {
STATUS_NONE,
STATUS_READING_ID,
STATUS_READING_STRING,
STATUS_READING_CONTEXT,
STATUS_READING_PLURAL,
};
Status status = STATUS_NONE;
String msg_id;
String msg_id_plural;
String msg_context;
int line = 1;
bool entered_context = false;
bool is_eof = false;
while (!is_eof) {
String l = fa->get_line().strip_edges();
is_eof = fa->eof_reached();
// If we reached last line and it's not a content line, break, otherwise let processing that last loop.
if (is_eof && l.is_empty()) {
if (status == STATUS_READING_ID || status == STATUS_READING_CONTEXT || status == STATUS_READING_PLURAL) {
ERR_FAIL_V_MSG(Vector<Vector<String>>(), "Unexpected EOF while reading POT file at: " + path + ":" + itos(line));
} else {
break;
}
}
if (l.begins_with("msgctxt")) {
ERR_FAIL_COND_V_MSG(status != STATUS_READING_STRING && status != STATUS_READING_PLURAL, Vector<Vector<String>>(),
"Unexpected 'msgctxt', was expecting 'msgid_plural' or 'msgstr' before 'msgctxt' while parsing: " + path + ":" + itos(line));
// In POT files, "msgctxt" appears before "msgid". If we encounter a "msgctxt", we add what we have read
// and set "entered_context" to true to prevent adding twice.
if (!msg_id.is_empty()) {
Vector<String> msgs;
msgs.push_back(msg_id);
msgs.push_back(msg_context);
msgs.push_back(msg_id_plural);
list.push_back(msgs);
}
msg_context = "";
l = l.substr(7, l.length()).strip_edges();
status = STATUS_READING_CONTEXT;
entered_context = true;
}
if (l.begins_with("msgid_plural")) {
if (status != STATUS_READING_ID) {
ERR_FAIL_V_MSG(Vector<Vector<String>>(), "Unexpected 'msgid_plural', was expecting 'msgid' before 'msgid_plural' while parsing: " + path + ":" + itos(line));
}
l = l.substr(12, l.length()).strip_edges();
status = STATUS_READING_PLURAL;
} else if (l.begins_with("msgid")) {
ERR_FAIL_COND_V_MSG(status == STATUS_READING_ID, Vector<Vector<String>>(), "Unexpected 'msgid', was expecting 'msgstr' while parsing: " + path + ":" + itos(line));
if (!msg_id.is_empty() && !entered_context) {
Vector<String> msgs;
msgs.push_back(msg_id);
msgs.push_back(msg_context);
msgs.push_back(msg_id_plural);
list.push_back(msgs);
}
l = l.substr(5, l.length()).strip_edges();
status = STATUS_READING_ID;
// If we did not encounter msgctxt, we reset context to empty to reset it.
if (!entered_context) {
msg_context = "";
}
msg_id = "";
msg_id_plural = "";
entered_context = false;
}
if (l.begins_with("msgstr[")) {
ERR_FAIL_COND_V_MSG(status != STATUS_READING_PLURAL, Vector<Vector<String>>(),
"Unexpected 'msgstr[]', was expecting 'msgid_plural' before 'msgstr[]' while parsing: " + path + ":" + itos(line));
l = l.substr(9, l.length()).strip_edges();
} else if (l.begins_with("msgstr")) {
ERR_FAIL_COND_V_MSG(status != STATUS_READING_ID, Vector<Vector<String>>(),
"Unexpected 'msgstr', was expecting 'msgid' before 'msgstr' while parsing: " + path + ":" + itos(line));
l = l.substr(6, l.length()).strip_edges();
status = STATUS_READING_STRING;
}
if (l.is_empty() || l.begins_with("#")) {
line++;
continue; // Nothing to read or comment.
}
ERR_FAIL_COND_V_MSG(!l.begins_with("\"") || status == STATUS_NONE, Vector<Vector<String>>(), "Invalid line '" + l + "' while parsing: " + path + ":" + itos(line));
l = l.substr(1, l.length());
// Find final quote, ignoring escaped ones (\").
// The escape_next logic is necessary to properly parse things like \\"
// where the backslash is the one being escaped, not the quote.
int end_pos = -1;
bool escape_next = false;
for (int i = 0; i < l.length(); i++) {
if (l[i] == '\\' && !escape_next) {
escape_next = true;
continue;
}
if (l[i] == '"' && !escape_next) {
end_pos = i;
break;
}
escape_next = false;
}
ERR_FAIL_COND_V_MSG(end_pos == -1, Vector<Vector<String>>(), "Expected '\"' at end of message while parsing: " + path + ":" + itos(line));
l = l.substr(0, end_pos);
l = l.c_unescape();
if (status == STATUS_READING_ID) {
msg_id += l;
} else if (status == STATUS_READING_CONTEXT) {
msg_context += l;
} else if (status == STATUS_READING_PLURAL) {
msg_id_plural += l;
}
line++;
}
}
etl++;
}
return msgids;
return list;
}

View File

@ -40,6 +40,6 @@ void load_editor_translations(const String &p_locale);
void load_property_translations(const String &p_locale);
void load_doc_translations(const String &p_locale);
void load_extractable_translations(const String &p_locale);
List<StringName> get_extractable_message_list();
Vector<Vector<String>> get_extractable_message_list();
#endif // EDITOR_TRANSLATION_H

View File

@ -34,7 +34,6 @@
#include "core/error/error_macros.h"
#include "editor/editor_translation.h"
#include "editor/editor_translation_parser.h"
#include "plugins/packed_scene_translation_parser_plugin.h"
POTGenerator *POTGenerator::singleton = nullptr;
@ -66,8 +65,6 @@ void POTGenerator::generate_pot(const String &p_file) {
// Clear all_translation_strings of the previous round.
all_translation_strings.clear();
List<StringName> extractable_msgids = get_extractable_message_list();
// Collect all translatable strings according to files order in "POT Generation" setting.
for (int i = 0; i < files.size(); i++) {
Vector<String> msgids;
@ -92,8 +89,8 @@ void POTGenerator::generate_pot(const String &p_file) {
}
if (GLOBAL_GET("internationalization/locale/translation_add_builtin_strings_to_pot")) {
for (const StringName &extractable_msgid : extractable_msgids) {
_add_new_msgid(extractable_msgid, "", "", "");
for (const Vector<String> &extractable_msgids : get_extractable_message_list()) {
_add_new_msgid(extractable_msgids[0], extractable_msgids[1], extractable_msgids[2], "");
}
}