From 72e5f8c31e9237a4581cf250aa9943a4290f4017 Mon Sep 17 00:00:00 2001 From: George Marques Date: Fri, 26 Jan 2024 14:49:31 -0300 Subject: [PATCH] GDScript: Enable compression on export Besides the regular option to export GDScript as binary tokens, this also includes a compression option on top of it. The binary format needs to encode some information which generally makes it bigger than the source text. This option reduces that difference by using Zstandard compression on the buffer. --- editor/export/editor_export.cpp | 2 +- editor/export/editor_export_preset.h | 3 +- editor/export/project_export.cpp | 2 + modules/gdscript/gdscript.cpp | 2 +- modules/gdscript/gdscript_parser.cpp | 3 + modules/gdscript/gdscript_tokenizer.cpp | 2 +- .../gdscript/gdscript_tokenizer_buffer.cpp | 106 ++++++++++++------ modules/gdscript/gdscript_tokenizer_buffer.h | 8 +- modules/gdscript/register_types.cpp | 5 +- .../gdscript/tests/gdscript_test_runner.cpp | 2 +- modules/gdscript/tests/test_gdscript.cpp | 2 +- 11 files changed, 93 insertions(+), 44 deletions(-) diff --git a/editor/export/editor_export.cpp b/editor/export/editor_export.cpp index cd7e813dbdb..aeb49661691 100644 --- a/editor/export/editor_export.cpp +++ b/editor/export/editor_export.cpp @@ -270,7 +270,7 @@ void EditorExport::load_config() { preset->set_include_filter(config->get_value(section, "include_filter")); preset->set_exclude_filter(config->get_value(section, "exclude_filter")); preset->set_export_path(config->get_value(section, "export_path", "")); - preset->set_script_export_mode(config->get_value(section, "script_export_mode", EditorExportPreset::MODE_SCRIPT_BINARY_TOKENS)); + preset->set_script_export_mode(config->get_value(section, "script_export_mode", EditorExportPreset::MODE_SCRIPT_BINARY_TOKENS_COMPRESSED)); if (config->has_section_key(section, "encrypt_pck")) { preset->set_enc_pck(config->get_value(section, "encrypt_pck")); diff --git a/editor/export/editor_export_preset.h b/editor/export/editor_export_preset.h index c5f2a0ee79a..effce48111a 100644 --- a/editor/export/editor_export_preset.h +++ b/editor/export/editor_export_preset.h @@ -57,6 +57,7 @@ public: enum ScriptExportMode { MODE_SCRIPT_TEXT, MODE_SCRIPT_BINARY_TOKENS, + MODE_SCRIPT_BINARY_TOKENS_COMPRESSED, }; private: @@ -89,7 +90,7 @@ private: bool enc_directory = false; String script_key; - int script_mode = MODE_SCRIPT_BINARY_TOKENS; + int script_mode = MODE_SCRIPT_BINARY_TOKENS_COMPRESSED; protected: bool _set(const StringName &p_name, const Variant &p_value); diff --git a/editor/export/project_export.cpp b/editor/export/project_export.cpp index dba524310ea..ff1fa3470e2 100644 --- a/editor/export/project_export.cpp +++ b/editor/export/project_export.cpp @@ -1398,7 +1398,9 @@ ProjectExportDialog::ProjectExportDialog() { script_vb->add_margin_child(TTR("GDScript Export Mode:"), script_mode); script_mode->add_item(TTR("Text (easier debugging)"), (int)EditorExportPreset::MODE_SCRIPT_TEXT); script_mode->add_item(TTR("Binary tokens (faster loading)"), (int)EditorExportPreset::MODE_SCRIPT_BINARY_TOKENS); + script_mode->add_item(TTR("Compressed binary tokens (smaller files)"), (int)EditorExportPreset::MODE_SCRIPT_BINARY_TOKENS_COMPRESSED); script_mode->connect("item_selected", callable_mp(this, &ProjectExportDialog::_script_export_mode_changed)); + sections->add_child(script_vb); sections->connect("tab_changed", callable_mp(this, &ProjectExportDialog::_tab_changed)); diff --git a/modules/gdscript/gdscript.cpp b/modules/gdscript/gdscript.cpp index 551adcb3202..7a576d52925 100644 --- a/modules/gdscript/gdscript.cpp +++ b/modules/gdscript/gdscript.cpp @@ -1066,7 +1066,7 @@ const Vector &GDScript::get_binary_tokens_source() const { Vector GDScript::get_as_binary_tokens() const { GDScriptTokenizerBuffer tokenizer; - return tokenizer.parse_code_string(source); + return tokenizer.parse_code_string(source, GDScriptTokenizerBuffer::COMPRESS_NONE); } const HashMap &GDScript::debug_get_member_functions() const { diff --git a/modules/gdscript/gdscript_parser.cpp b/modules/gdscript/gdscript_parser.cpp index a0036d38d6d..3ba6e4d1607 100644 --- a/modules/gdscript/gdscript_parser.cpp +++ b/modules/gdscript/gdscript_parser.cpp @@ -365,6 +365,7 @@ Error GDScriptParser::parse(const String &p_source_code, const String &p_script_ pop_multiline(); memdelete(text_tokenizer); + tokenizer = nullptr; #ifdef DEBUG_ENABLED if (multiline_stack.size() > 0) { @@ -384,6 +385,7 @@ Error GDScriptParser::parse_binary(const Vector &p_binary, const String Error err = buffer_tokenizer->set_code_buffer(p_binary); if (err) { + memdelete(buffer_tokenizer); return err; } @@ -404,6 +406,7 @@ Error GDScriptParser::parse_binary(const Vector &p_binary, const String pop_multiline(); memdelete(buffer_tokenizer); + tokenizer = nullptr; if (errors.is_empty()) { return OK; diff --git a/modules/gdscript/gdscript_tokenizer.cpp b/modules/gdscript/gdscript_tokenizer.cpp index a4425a2bf00..2940af585dc 100644 --- a/modules/gdscript/gdscript_tokenizer.cpp +++ b/modules/gdscript/gdscript_tokenizer.cpp @@ -284,7 +284,7 @@ void GDScriptTokenizerText::push_expression_indented_block() { } void GDScriptTokenizerText::pop_expression_indented_block() { - ERR_FAIL_COND(indent_stack_stack.size() == 0); + ERR_FAIL_COND(indent_stack_stack.is_empty()); indent_stack = indent_stack_stack.back()->get(); indent_stack_stack.pop_back(); } diff --git a/modules/gdscript/gdscript_tokenizer_buffer.cpp b/modules/gdscript/gdscript_tokenizer_buffer.cpp index 5b41c411d84..db523ea9419 100644 --- a/modules/gdscript/gdscript_tokenizer_buffer.cpp +++ b/modules/gdscript/gdscript_tokenizer_buffer.cpp @@ -30,6 +30,7 @@ #include "gdscript_tokenizer_buffer.h" +#include "core/io/compression.h" #include "core/io/marshalls.h" #define TOKENIZER_VERSION 100 @@ -139,19 +140,31 @@ GDScriptTokenizer::Token GDScriptTokenizerBuffer::_binary_to_token(const uint8_t Error GDScriptTokenizerBuffer::set_code_buffer(const Vector &p_buffer) { const uint8_t *buf = p_buffer.ptr(); - int total_len = p_buffer.size(); - ERR_FAIL_COND_V(p_buffer.size() < 24 || p_buffer[0] != 'G' || p_buffer[1] != 'D' || p_buffer[2] != 'S' || p_buffer[3] != 'C', ERR_INVALID_DATA); + ERR_FAIL_COND_V(p_buffer.size() < 12 || p_buffer[0] != 'G' || p_buffer[1] != 'D' || p_buffer[2] != 'S' || p_buffer[3] != 'C', ERR_INVALID_DATA); int version = decode_uint32(&buf[4]); ERR_FAIL_COND_V_MSG(version > TOKENIZER_VERSION, ERR_INVALID_DATA, "Binary GDScript is too recent! Please use a newer engine version."); - uint32_t identifier_count = decode_uint32(&buf[8]); - uint32_t constant_count = decode_uint32(&buf[12]); - uint32_t token_line_count = decode_uint32(&buf[16]); - uint32_t token_count = decode_uint32(&buf[20]); + int decompressed_size = decode_uint32(&buf[8]); - const uint8_t *b = &buf[24]; - total_len -= 24; + Vector contents; + if (decompressed_size == 0) { + contents = p_buffer.slice(12); + } else { + contents.resize(decompressed_size); + int result = Compression::decompress(contents.ptrw(), contents.size(), &buf[12], p_buffer.size() - 12, Compression::MODE_ZSTD); + ERR_FAIL_COND_V_MSG(result != decompressed_size, ERR_INVALID_DATA, "Error decompressing GDScript tokenizer buffer."); + } + + int total_len = contents.size(); + buf = contents.ptr(); + uint32_t identifier_count = decode_uint32(&buf[0]); + uint32_t constant_count = decode_uint32(&buf[4]); + uint32_t token_line_count = decode_uint32(&buf[8]); + uint32_t token_count = decode_uint32(&buf[16]); + + const uint8_t *b = &buf[20]; + total_len -= 20; identifiers.resize(identifier_count); for (uint32_t i = 0; i < identifier_count; i++) { @@ -226,9 +239,7 @@ Error GDScriptTokenizerBuffer::set_code_buffer(const Vector &p_buffer) return OK; } -Vector GDScriptTokenizerBuffer::parse_code_string(const String &p_code) { - Vector buf; - +Vector GDScriptTokenizerBuffer::parse_code_string(const String &p_code, CompressMode p_compress_mode) { HashMap identifier_map; HashMap constant_map; Vector token_buffer; @@ -280,28 +291,23 @@ Vector GDScriptTokenizerBuffer::parse_code_string(const String &p_code) } } - // Save header. - buf.resize(24); - buf.write[0] = 'G'; - buf.write[1] = 'D'; - buf.write[2] = 'S'; - buf.write[3] = 'C'; - encode_uint32(TOKENIZER_VERSION, &buf.write[4]); - encode_uint32(identifier_map.size(), &buf.write[8]); - encode_uint32(constant_map.size(), &buf.write[12]); - encode_uint32(token_lines.size(), &buf.write[16]); - encode_uint32(token_counter, &buf.write[20]); + Vector contents; + contents.resize(20); + encode_uint32(identifier_map.size(), &contents.write[0]); + encode_uint32(constant_map.size(), &contents.write[4]); + encode_uint32(token_lines.size(), &contents.write[8]); + encode_uint32(token_counter, &contents.write[16]); - int buf_pos = 24; + int buf_pos = 20; // Save identifiers. for (const StringName &id : rev_identifier_map) { String s = id.operator String(); int len = s.length(); - buf.resize(buf_pos + (len + 1) * 4); + contents.resize(buf_pos + (len + 1) * 4); - encode_uint32(len, &buf.write[buf_pos]); + encode_uint32(len, &contents.write[buf_pos]); buf_pos += 4; for (int i = 0; i < len; i++) { @@ -309,7 +315,7 @@ Vector GDScriptTokenizerBuffer::parse_code_string(const String &p_code) encode_uint32(s[i], tmp); for (int b = 0; b < 4; b++) { - buf.write[buf_pos + b] = tmp[b] ^ 0xb6; + contents.write[buf_pos + b] = tmp[b] ^ 0xb6; } buf_pos += 4; @@ -322,28 +328,58 @@ Vector GDScriptTokenizerBuffer::parse_code_string(const String &p_code) // Objects cannot be constant, never encode objects. Error err = encode_variant(v, nullptr, len, false); ERR_FAIL_COND_V_MSG(err != OK, Vector(), "Error when trying to encode Variant."); - buf.resize(buf_pos + len); - encode_variant(v, &buf.write[buf_pos], len, false); + contents.resize(buf_pos + len); + encode_variant(v, &contents.write[buf_pos], len, false); buf_pos += len; } // Save lines and columns. - buf.resize(buf_pos + token_lines.size() * 16); + contents.resize(buf_pos + token_lines.size() * 16); for (const KeyValue &e : token_lines) { - encode_uint32(e.key, &buf.write[buf_pos]); + encode_uint32(e.key, &contents.write[buf_pos]); buf_pos += 4; - encode_uint32(e.value, &buf.write[buf_pos]); + encode_uint32(e.value, &contents.write[buf_pos]); buf_pos += 4; } for (const KeyValue &e : token_columns) { - encode_uint32(e.key, &buf.write[buf_pos]); + encode_uint32(e.key, &contents.write[buf_pos]); buf_pos += 4; - encode_uint32(e.value, &buf.write[buf_pos]); + encode_uint32(e.value, &contents.write[buf_pos]); buf_pos += 4; } // Store tokens. - buf.append_array(token_buffer); + contents.append_array(token_buffer); + + Vector buf; + + // Save header. + buf.resize(12); + buf.write[0] = 'G'; + buf.write[1] = 'D'; + buf.write[2] = 'S'; + buf.write[3] = 'C'; + encode_uint32(TOKENIZER_VERSION, &buf.write[4]); + + switch (p_compress_mode) { + case COMPRESS_NONE: + encode_uint32(0u, &buf.write[8]); + buf.append_array(contents); + break; + + case COMPRESS_ZSTD: { + encode_uint32(contents.size(), &buf.write[8]); + Vector compressed; + int max_size = Compression::get_max_compressed_buffer_size(contents.size(), Compression::MODE_ZSTD); + compressed.resize(max_size); + + int compressed_size = Compression::compress(compressed.ptrw(), contents.ptr(), contents.size(), Compression::MODE_ZSTD); + ERR_FAIL_COND_V_MSG(compressed_size < 0, Vector(), "Error compressing GDScript tokenizer buffer."); + compressed.resize(compressed_size); + + buf.append_array(compressed); + } break; + } return buf; } @@ -372,7 +408,7 @@ void GDScriptTokenizerBuffer::push_expression_indented_block() { } void GDScriptTokenizerBuffer::pop_expression_indented_block() { - ERR_FAIL_COND(indent_stack_stack.size() == 0); + ERR_FAIL_COND(indent_stack_stack.is_empty()); indent_stack = indent_stack_stack.back()->get(); indent_stack_stack.pop_back(); } diff --git a/modules/gdscript/gdscript_tokenizer_buffer.h b/modules/gdscript/gdscript_tokenizer_buffer.h index 192a7b3f159..55df66e50f7 100644 --- a/modules/gdscript/gdscript_tokenizer_buffer.h +++ b/modules/gdscript/gdscript_tokenizer_buffer.h @@ -34,6 +34,12 @@ #include "gdscript_tokenizer.h" class GDScriptTokenizerBuffer : public GDScriptTokenizer { +public: + enum CompressMode { + COMPRESS_NONE, + COMPRESS_ZSTD, + }; + enum { TOKEN_BYTE_MASK = 0x80, TOKEN_BITS = 8, @@ -64,7 +70,7 @@ class GDScriptTokenizerBuffer : public GDScriptTokenizer { public: Error set_code_buffer(const Vector &p_buffer); - static Vector parse_code_string(const String &p_code); + static Vector parse_code_string(const String &p_code, CompressMode p_compress_mode); virtual int get_cursor_line() const override; virtual int get_cursor_column() const override; diff --git a/modules/gdscript/register_types.cpp b/modules/gdscript/register_types.cpp index e835c93b7c0..5ff1c78ac97 100644 --- a/modules/gdscript/register_types.cpp +++ b/modules/gdscript/register_types.cpp @@ -84,7 +84,7 @@ class EditorExportGDScript : public EditorExportPlugin { public: virtual void _export_file(const String &p_path, const String &p_type, const HashSet &p_features) override { - int script_mode = EditorExportPreset::MODE_SCRIPT_BINARY_TOKENS; + int script_mode = EditorExportPreset::MODE_SCRIPT_BINARY_TOKENS_COMPRESSED; const Ref &preset = get_export_preset(); @@ -103,7 +103,8 @@ public: String source; source.parse_utf8(reinterpret_cast(file.ptr()), file.size()); - file = GDScriptTokenizerBuffer::parse_code_string(source); + GDScriptTokenizerBuffer::CompressMode compress_mode = script_mode == EditorExportPreset::MODE_SCRIPT_BINARY_TOKENS_COMPRESSED ? GDScriptTokenizerBuffer::COMPRESS_ZSTD : GDScriptTokenizerBuffer::COMPRESS_NONE; + file = GDScriptTokenizerBuffer::parse_code_string(source, compress_mode); if (file.is_empty()) { return; } diff --git a/modules/gdscript/tests/gdscript_test_runner.cpp b/modules/gdscript/tests/gdscript_test_runner.cpp index 880289d2a80..a0329eb8d23 100644 --- a/modules/gdscript/tests/gdscript_test_runner.cpp +++ b/modules/gdscript/tests/gdscript_test_runner.cpp @@ -538,7 +538,7 @@ GDScriptTest::TestResult GDScriptTest::execute_test_code(bool p_is_generating) { } else { String code = FileAccess::get_file_as_string(source_file, &err); if (!err) { - Vector buffer = GDScriptTokenizerBuffer::parse_code_string(code); + Vector buffer = GDScriptTokenizerBuffer::parse_code_string(code, GDScriptTokenizerBuffer::COMPRESS_ZSTD); script->set_binary_tokens_source(buffer); } } diff --git a/modules/gdscript/tests/test_gdscript.cpp b/modules/gdscript/tests/test_gdscript.cpp index e4fab68e06c..f6965cf7cfb 100644 --- a/modules/gdscript/tests/test_gdscript.cpp +++ b/modules/gdscript/tests/test_gdscript.cpp @@ -111,7 +111,7 @@ static void test_tokenizer(const String &p_code, const Vector &p_lines) static void test_tokenizer_buffer(const Vector &p_buffer, const Vector &p_lines); static void test_tokenizer_buffer(const String &p_code, const Vector &p_lines) { - Vector binary = GDScriptTokenizerBuffer::parse_code_string(p_code); + Vector binary = GDScriptTokenizerBuffer::parse_code_string(p_code, GDScriptTokenizerBuffer::COMPRESS_NONE); test_tokenizer_buffer(binary, p_lines); }