From 7ceed18790365c6938ad958d747c1bed421510e3 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?R=C3=A9mi=20Verschelde?= Date: Tue, 14 May 2024 12:51:49 +0200 Subject: [PATCH] astcenc: Update to 4.8.0 https://github.com/ARM-software/astc-encoder/releases/tag/4.8.0 --- thirdparty/README.md | 2 +- thirdparty/astcenc/astcenc_entry.cpp | 2 +- .../astcenc/astcenc_integer_sequence.cpp | 8 ++--- thirdparty/astcenc/astcenc_internal.h | 32 +++++++++++-------- thirdparty/astcenc/astcenc_internal_entry.h | 1 + .../astcenc/astcenc_symbolic_physical.cpp | 21 ++++++------ .../astcenc/astcenc_vecmathlib_none_4.h | 17 +++++++--- 7 files changed, 49 insertions(+), 34 deletions(-) diff --git a/thirdparty/README.md b/thirdparty/README.md index ad12fbd7053..fd8fc8999e2 100644 --- a/thirdparty/README.md +++ b/thirdparty/README.md @@ -47,7 +47,7 @@ Files extracted from upstream source: ## astcenc - Upstream: https://github.com/ARM-software/astc-encoder -- Version: 4.7.0 (1a51f2915121275038677317c8bf61f1a78b590c, 2024) +- Version: 4.8.0 (0d6c9047c5ad19640e2d60fdb8f11a16675e7938, 2024) - License: Apache 2.0 Files extracted from upstream source: diff --git a/thirdparty/astcenc/astcenc_entry.cpp b/thirdparty/astcenc/astcenc_entry.cpp index 71efe9cec48..5dc38016065 100644 --- a/thirdparty/astcenc/astcenc_entry.cpp +++ b/thirdparty/astcenc/astcenc_entry.cpp @@ -1167,7 +1167,7 @@ astcenc_error astcenc_decompress_image( return ASTCENC_ERR_OUT_OF_MEM; } - image_block blk; + image_block blk {}; blk.texel_count = static_cast(block_x * block_y * block_z); // Decode mode inferred from the output data type diff --git a/thirdparty/astcenc/astcenc_integer_sequence.cpp b/thirdparty/astcenc/astcenc_integer_sequence.cpp index 416750374dc..41dc38b74e1 100644 --- a/thirdparty/astcenc/astcenc_integer_sequence.cpp +++ b/thirdparty/astcenc/astcenc_integer_sequence.cpp @@ -1,6 +1,6 @@ // SPDX-License-Identifier: Apache-2.0 // ---------------------------------------------------------------------------- -// Copyright 2011-2021 Arm Limited +// Copyright 2011-2024 Arm Limited // // Licensed under the Apache License, Version 2.0 (the "License"); you may not // use this file except in compliance with the License. You may obtain a copy @@ -464,10 +464,10 @@ static inline void write_bits( } /** - * @brief Read up to 8 bits at an arbitrary bit offset. + * @brief Read up to 16 bits from two bytes. * - * The stored value is at most 8 bits, but can be stored at an offset of between 0 and 7 bits so may - * span two separate bytes in memory. + * This function reads a packed N-bit field from two bytes in memory. The stored value must exist + * within the two bytes, but can start at an arbitary bit offset and span the two bytes in memory. * * @param bitcount The number of bits to read. * @param bitoffset The bit offset to read from, between 0 and 7. diff --git a/thirdparty/astcenc/astcenc_internal.h b/thirdparty/astcenc/astcenc_internal.h index 715028ac8f4..df6e07f9ee9 100644 --- a/thirdparty/astcenc/astcenc_internal.h +++ b/thirdparty/astcenc/astcenc_internal.h @@ -326,10 +326,10 @@ struct partition_info uint8_t partition_texel_count[BLOCK_MAX_PARTITIONS]; /** @brief The partition of each texel in the block. */ - uint8_t partition_of_texel[BLOCK_MAX_TEXELS]; + ASTCENC_ALIGNAS uint8_t partition_of_texel[BLOCK_MAX_TEXELS]; /** @brief The list of texels in each partition. */ - uint8_t texels_of_partition[BLOCK_MAX_PARTITIONS][BLOCK_MAX_TEXELS]; + ASTCENC_ALIGNAS uint8_t texels_of_partition[BLOCK_MAX_PARTITIONS][BLOCK_MAX_TEXELS]; }; /** @@ -367,19 +367,19 @@ struct decimation_info * @brief The number of weights that contribute to each texel. * Value is between 1 and 4. */ - uint8_t texel_weight_count[BLOCK_MAX_TEXELS]; + ASTCENC_ALIGNAS uint8_t texel_weight_count[BLOCK_MAX_TEXELS]; /** * @brief The weight index of the N weights that are interpolated for each texel. * Stored transposed to improve vectorization. */ - uint8_t texel_weights_tr[4][BLOCK_MAX_TEXELS]; + ASTCENC_ALIGNAS uint8_t texel_weights_tr[4][BLOCK_MAX_TEXELS]; /** * @brief The bilinear contribution of the N weights that are interpolated for each texel. * Value is between 0 and 16, stored transposed to improve vectorization. */ - uint8_t texel_weight_contribs_int_tr[4][BLOCK_MAX_TEXELS]; + ASTCENC_ALIGNAS uint8_t texel_weight_contribs_int_tr[4][BLOCK_MAX_TEXELS]; /** * @brief The bilinear contribution of the N weights that are interpolated for each texel. @@ -388,13 +388,13 @@ struct decimation_info ASTCENC_ALIGNAS float texel_weight_contribs_float_tr[4][BLOCK_MAX_TEXELS]; /** @brief The number of texels that each stored weight contributes to. */ - uint8_t weight_texel_count[BLOCK_MAX_WEIGHTS]; + ASTCENC_ALIGNAS uint8_t weight_texel_count[BLOCK_MAX_WEIGHTS]; /** * @brief The list of texels that use a specific weight index. * Stored transposed to improve vectorization. */ - uint8_t weight_texels_tr[BLOCK_MAX_TEXELS][BLOCK_MAX_WEIGHTS]; + ASTCENC_ALIGNAS uint8_t weight_texels_tr[BLOCK_MAX_TEXELS][BLOCK_MAX_WEIGHTS]; /** * @brief The bilinear contribution to the N texels that use each weight. @@ -732,7 +732,11 @@ struct block_size_descriptor * * The @c data_[rgba] fields store the image data in an encoded SoA float form designed for easy * vectorization. Input data is converted to float and stored as values between 0 and 65535. LDR - * data is stored as direct UNORM data, HDR data is stored as LNS data. + * data is stored as direct UNORM data, HDR data is stored as LNS data. They are allocated SIMD + * elements over-size to allow vectorized stores of unaligned and partial SIMD lanes (e.g. in a + * 6x6x6 block the final row write will read elements 210-217 (vec8) or 214-217 (vec4), which is + * two elements above the last real data element). The overspill values are never written to memory, + * and would be benign, but the padding avoids hitting undefined behavior. * * The @c rgb_lns and @c alpha_lns fields that assigned a per-texel use of HDR are only used during * decompression. The current compressor will always use HDR endpoint formats when in HDR mode. @@ -740,16 +744,16 @@ struct block_size_descriptor struct image_block { /** @brief The input (compress) or output (decompress) data for the red color component. */ - ASTCENC_ALIGNAS float data_r[BLOCK_MAX_TEXELS]; + ASTCENC_ALIGNAS float data_r[BLOCK_MAX_TEXELS + ASTCENC_SIMD_WIDTH - 1]; /** @brief The input (compress) or output (decompress) data for the green color component. */ - ASTCENC_ALIGNAS float data_g[BLOCK_MAX_TEXELS]; + ASTCENC_ALIGNAS float data_g[BLOCK_MAX_TEXELS + ASTCENC_SIMD_WIDTH - 1]; /** @brief The input (compress) or output (decompress) data for the blue color component. */ - ASTCENC_ALIGNAS float data_b[BLOCK_MAX_TEXELS]; + ASTCENC_ALIGNAS float data_b[BLOCK_MAX_TEXELS + ASTCENC_SIMD_WIDTH - 1]; /** @brief The input (compress) or output (decompress) data for the alpha color component. */ - ASTCENC_ALIGNAS float data_a[BLOCK_MAX_TEXELS]; + ASTCENC_ALIGNAS float data_a[BLOCK_MAX_TEXELS + ASTCENC_SIMD_WIDTH - 1]; /** @brief The number of texels in the block. */ uint8_t texel_count; @@ -957,7 +961,7 @@ struct ASTCENC_ALIGNAS compression_working_buffers * * For two planes, second plane starts at @c WEIGHTS_PLANE2_OFFSET offsets. */ - uint8_t dec_weights_uquant[WEIGHTS_MAX_BLOCK_MODES * BLOCK_MAX_WEIGHTS]; + ASTCENC_ALIGNAS uint8_t dec_weights_uquant[WEIGHTS_MAX_BLOCK_MODES * BLOCK_MAX_WEIGHTS]; /** @brief Error of the best encoding combination for each block mode. */ ASTCENC_ALIGNAS float errors_of_best_combination[WEIGHTS_MAX_BLOCK_MODES]; @@ -1111,7 +1115,7 @@ struct symbolic_compressed_block * * If dual plane, the second plane starts at @c weights[WEIGHTS_PLANE2_OFFSET]. */ - uint8_t weights[BLOCK_MAX_WEIGHTS]; + ASTCENC_ALIGNAS uint8_t weights[BLOCK_MAX_WEIGHTS]; /** * @brief Get the weight quantization used by this block mode. diff --git a/thirdparty/astcenc/astcenc_internal_entry.h b/thirdparty/astcenc/astcenc_internal_entry.h index 26677147810..c283c5acc68 100644 --- a/thirdparty/astcenc/astcenc_internal_entry.h +++ b/thirdparty/astcenc/astcenc_internal_entry.h @@ -150,6 +150,7 @@ public: m_start_count = 0; m_done_count = 0; m_task_count = 0; + m_callback = nullptr; m_callback_last_value = 0.0f; m_callback_min_diff = 1.0f; } diff --git a/thirdparty/astcenc/astcenc_symbolic_physical.cpp b/thirdparty/astcenc/astcenc_symbolic_physical.cpp index c4da678f1c3..45d9abb64ba 100644 --- a/thirdparty/astcenc/astcenc_symbolic_physical.cpp +++ b/thirdparty/astcenc/astcenc_symbolic_physical.cpp @@ -330,12 +330,14 @@ void physical_to_symbolic( return; } + // Low values span 3 bytes so need two read_bits calls int vx_low_s = read_bits(8, 12, pcb) | (read_bits(5, 12 + 8, pcb) << 8); - int vx_high_s = read_bits(8, 25, pcb) | (read_bits(5, 25 + 8, pcb) << 8); + int vx_high_s = read_bits(13, 25, pcb); int vx_low_t = read_bits(8, 38, pcb) | (read_bits(5, 38 + 8, pcb) << 8); - int vx_high_t = read_bits(8, 51, pcb) | (read_bits(5, 51 + 8, pcb) << 8); + int vx_high_t = read_bits(13, 51, pcb); - int all_ones = vx_low_s == 0x1FFF && vx_high_s == 0x1FFF && vx_low_t == 0x1FFF && vx_high_t == 0x1FFF; + int all_ones = vx_low_s == 0x1FFF && vx_high_s == 0x1FFF && + vx_low_t == 0x1FFF && vx_high_t == 0x1FFF; if ((vx_low_s >= vx_high_s || vx_low_t >= vx_high_t) && !all_ones) { @@ -350,12 +352,14 @@ void physical_to_symbolic( int vx_high_s = read_bits(9, 19, pcb); int vx_low_t = read_bits(9, 28, pcb); int vx_high_t = read_bits(9, 37, pcb); - int vx_low_p = read_bits(9, 46, pcb); - int vx_high_p = read_bits(9, 55, pcb); + int vx_low_r = read_bits(9, 46, pcb); + int vx_high_r = read_bits(9, 55, pcb); - int all_ones = vx_low_s == 0x1FF && vx_high_s == 0x1FF && vx_low_t == 0x1FF && vx_high_t == 0x1FF && vx_low_p == 0x1FF && vx_high_p == 0x1FF; + int all_ones = vx_low_s == 0x1FF && vx_high_s == 0x1FF && + vx_low_t == 0x1FF && vx_high_t == 0x1FF && + vx_low_r == 0x1FF && vx_high_r == 0x1FF; - if ((vx_low_s >= vx_high_s || vx_low_t >= vx_high_t || vx_low_p >= vx_high_p) && !all_ones) + if ((vx_low_s >= vx_high_s || vx_low_t >= vx_high_t || vx_low_r >= vx_high_r) && !all_ones) { scb.block_type = SYM_BTYPE_ERROR; return; @@ -470,8 +474,7 @@ void physical_to_symbolic( bitpos += 2; } } - scb.partition_index = static_cast(read_bits(6, 13, pcb) | - (read_bits(PARTITION_INDEX_BITS - 6, 19, pcb) << 6)); + scb.partition_index = static_cast(read_bits(10, 13, pcb)); } for (int i = 0; i < partition_count; i++) diff --git a/thirdparty/astcenc/astcenc_vecmathlib_none_4.h b/thirdparty/astcenc/astcenc_vecmathlib_none_4.h index 1c95c2ff88a..be7348eff1c 100644 --- a/thirdparty/astcenc/astcenc_vecmathlib_none_4.h +++ b/thirdparty/astcenc/astcenc_vecmathlib_none_4.h @@ -1,6 +1,6 @@ // SPDX-License-Identifier: Apache-2.0 // ---------------------------------------------------------------------------- -// Copyright 2019-2023 Arm Limited +// Copyright 2019-2024 Arm Limited // // Licensed under the Apache License, Version 2.0 (the "License"); you may not // use this file except in compliance with the License. You may obtain a copy @@ -556,10 +556,16 @@ ASTCENC_SIMD_INLINE vmask4 operator>(vint4 a, vint4 b) */ template ASTCENC_SIMD_INLINE vint4 lsl(vint4 a) { - return vint4(a.m[0] << s, - a.m[1] << s, - a.m[2] << s, - a.m[3] << s); + // Cast to unsigned to avoid shift in/out of sign bit undefined behavior + unsigned int as0 = static_cast(a.m[0]) << s; + unsigned int as1 = static_cast(a.m[1]) << s; + unsigned int as2 = static_cast(a.m[2]) << s; + unsigned int as3 = static_cast(a.m[3]) << s; + + return vint4(static_cast(as0), + static_cast(as1), + static_cast(as2), + static_cast(as3)); } /** @@ -567,6 +573,7 @@ template ASTCENC_SIMD_INLINE vint4 lsl(vint4 a) */ template ASTCENC_SIMD_INLINE vint4 lsr(vint4 a) { + // Cast to unsigned to avoid shift in/out of sign bit undefined behavior unsigned int as0 = static_cast(a.m[0]) >> s; unsigned int as1 = static_cast(a.m[1]) >> s; unsigned int as2 = static_cast(a.m[2]) >> s;