mirror of
https://github.com/godotengine/godot.git
synced 2024-11-10 06:03:09 +00:00
Merge pull request #95915 from BlueCube3310/betsy-bc1
Betsy: Add caching and BC1 compression support
This commit is contained in:
commit
67c9708eb8
@ -2751,6 +2751,19 @@ Error Image::compress_from_channels(CompressMode p_mode, UsedChannels p_channels
|
||||
|
||||
} break;
|
||||
|
||||
case COMPRESS_S3TC: {
|
||||
// BC3 is unsupported currently.
|
||||
if ((p_channels == USED_CHANNELS_RGB || p_channels == USED_CHANNELS_L) && _image_compress_bc_rd_func) {
|
||||
Error result = _image_compress_bc_rd_func(this, p_channels);
|
||||
|
||||
// If the image was compressed successfully, we return here. If not, we fall back to the default compression scheme.
|
||||
if (result == OK) {
|
||||
return OK;
|
||||
}
|
||||
}
|
||||
|
||||
} break;
|
||||
|
||||
default: {
|
||||
}
|
||||
}
|
||||
@ -3138,6 +3151,7 @@ void (*Image::_image_compress_etc1_func)(Image *) = nullptr;
|
||||
void (*Image::_image_compress_etc2_func)(Image *, Image::UsedChannels) = nullptr;
|
||||
void (*Image::_image_compress_astc_func)(Image *, Image::ASTCFormat) = nullptr;
|
||||
Error (*Image::_image_compress_bptc_rd_func)(Image *, Image::UsedChannels) = nullptr;
|
||||
Error (*Image::_image_compress_bc_rd_func)(Image *, Image::UsedChannels) = nullptr;
|
||||
void (*Image::_image_decompress_bc)(Image *) = nullptr;
|
||||
void (*Image::_image_decompress_bptc)(Image *) = nullptr;
|
||||
void (*Image::_image_decompress_etc1)(Image *) = nullptr;
|
||||
|
@ -160,6 +160,7 @@ public:
|
||||
static void (*_image_compress_astc_func)(Image *, ASTCFormat p_format);
|
||||
|
||||
static Error (*_image_compress_bptc_rd_func)(Image *, UsedChannels p_channels);
|
||||
static Error (*_image_compress_bc_rd_func)(Image *, UsedChannels p_channels);
|
||||
|
||||
static void (*_image_decompress_bc)(Image *);
|
||||
static void (*_image_decompress_bptc)(Image *);
|
||||
|
@ -2894,10 +2894,13 @@
|
||||
<member name="rendering/textures/lossless_compression/force_png" type="bool" setter="" getter="" default="false">
|
||||
If [code]true[/code], the texture importer will import lossless textures using the PNG format. Otherwise, it will default to using WebP.
|
||||
</member>
|
||||
<member name="rendering/textures/vram_compression/cache_gpu_compressor" type="bool" setter="" getter="" default="true">
|
||||
If [code]true[/code], the GPU texture compressor will cache the local RenderingDevice and its resources (shaders and pipelines), allowing for faster subsequent imports at a memory cost.
|
||||
</member>
|
||||
<member name="rendering/textures/vram_compression/compress_with_gpu" type="bool" setter="" getter="" default="true">
|
||||
If [code]true[/code], the texture importer will utilize the GPU for compressing textures, which makes large textures import significantly faster.
|
||||
If [code]true[/code], the texture importer will utilize the GPU for compressing textures, improving the import time of large images.
|
||||
[b]Note:[/b] This setting requires either Vulkan or D3D12 available as a rendering backend.
|
||||
[b]Note:[/b] Currently this only affects BC6H compression, which is used on Desktop and Console for HDR images.
|
||||
[b]Note:[/b] Currently this only affects BC1 and BC6H compression, which are used on Desktop and Console for fully opaque and HDR images respectively.
|
||||
</member>
|
||||
<member name="rendering/textures/vram_compression/import_etc2_astc" type="bool" setter="" getter="" default="false">
|
||||
If [code]true[/code], the texture importer will import VRAM-compressed textures using the Ericsson Texture Compression 2 algorithm for lower quality textures and normal maps and Adaptable Scalable Texture Compression algorithm for high quality textures (in 4×4 block size).
|
||||
|
@ -4,6 +4,7 @@ Import("env_modules")
|
||||
|
||||
env_betsy = env_modules.Clone()
|
||||
env_betsy.GLSL_HEADER("bc6h.glsl")
|
||||
env_betsy.GLSL_HEADER("bc1.glsl")
|
||||
env_betsy.Depends(Glob("*.glsl.gen.h"), ["#glsl_builders.py"])
|
||||
|
||||
# Thirdparty source files
|
||||
|
483
modules/betsy/bc1.glsl
Normal file
483
modules/betsy/bc1.glsl
Normal file
@ -0,0 +1,483 @@
|
||||
#[versions]
|
||||
|
||||
standard = "";
|
||||
dithered = "#define BC1_DITHER";
|
||||
|
||||
#[compute]
|
||||
#version 450
|
||||
|
||||
#include "CrossPlatformSettings_piece_all.glsl"
|
||||
#include "UavCrossPlatform_piece_all.glsl"
|
||||
|
||||
#define FLT_MAX 340282346638528859811704183484516925440.0f
|
||||
|
||||
layout(binding = 0) uniform sampler2D srcTex;
|
||||
layout(binding = 1, rg32ui) uniform restrict writeonly uimage2D dstTexture;
|
||||
|
||||
layout(std430, binding = 2) readonly restrict buffer globalBuffer {
|
||||
float2 c_oMatch5[256];
|
||||
float2 c_oMatch6[256];
|
||||
};
|
||||
|
||||
layout(push_constant, std430) uniform Params {
|
||||
uint p_numRefinements;
|
||||
uint p_padding[3];
|
||||
}
|
||||
params;
|
||||
|
||||
layout(local_size_x = 8, //
|
||||
local_size_y = 8, //
|
||||
local_size_z = 1) in;
|
||||
|
||||
float3 rgb565to888(float rgb565) {
|
||||
float3 retVal;
|
||||
retVal.x = floor(rgb565 / 2048.0f);
|
||||
retVal.y = floor(mod(rgb565, 2048.0f) / 32.0f);
|
||||
retVal.z = floor(mod(rgb565, 32.0f));
|
||||
|
||||
// This is the correct 565 to 888 conversion:
|
||||
// rgb = floor( rgb * ( 255.0f / float3( 31.0f, 63.0f, 31.0f ) ) + 0.5f )
|
||||
//
|
||||
// However stb_dxt follows a different one:
|
||||
// rb = floor( rb * ( 256 / 32 + 8 / 32 ) );
|
||||
// g = floor( g * ( 256 / 64 + 4 / 64 ) );
|
||||
//
|
||||
// I'm not sure exactly why but it's possible this is how the S3TC specifies it should be decoded
|
||||
// It's quite possible this is the reason:
|
||||
// http://www.ludicon.com/castano/blog/2009/03/gpu-dxt-decompression/
|
||||
//
|
||||
// Or maybe it's just because it's cheap to do with integer shifts.
|
||||
// Anyway, we follow stb_dxt's conversion just in case
|
||||
// (gives almost the same result, with 1 or -1 of difference for a very few values)
|
||||
//
|
||||
// Perhaps when we make 888 -> 565 -> 888 it doesn't matter
|
||||
// because they end up mapping to the original number
|
||||
|
||||
return floor(retVal * float3(8.25f, 4.0625f, 8.25f));
|
||||
}
|
||||
|
||||
float rgb888to565(float3 rgbValue) {
|
||||
rgbValue.rb = floor(rgbValue.rb * 31.0f / 255.0f + 0.5f);
|
||||
rgbValue.g = floor(rgbValue.g * 63.0f / 255.0f + 0.5f);
|
||||
|
||||
return rgbValue.r * 2048.0f + rgbValue.g * 32.0f + rgbValue.b;
|
||||
}
|
||||
|
||||
// linear interpolation at 1/3 point between a and b, using desired rounding type
|
||||
float3 lerp13(float3 a, float3 b) {
|
||||
#ifdef STB_DXT_USE_ROUNDING_BIAS
|
||||
// with rounding bias
|
||||
return a + floor((b - a) * (1.0f / 3.0f) + 0.5f);
|
||||
#else
|
||||
// without rounding bias
|
||||
return floor((2.0f * a + b) / 3.0f);
|
||||
#endif
|
||||
}
|
||||
|
||||
/// Unpacks a block of 4 colors from two 16-bit endpoints
|
||||
void EvalColors(out float3 colors[4], float c0, float c1) {
|
||||
colors[0] = rgb565to888(c0);
|
||||
colors[1] = rgb565to888(c1);
|
||||
colors[2] = lerp13(colors[0], colors[1]);
|
||||
colors[3] = lerp13(colors[1], colors[0]);
|
||||
}
|
||||
|
||||
/** The color optimization function. (Clever code, part 1)
|
||||
@param outMinEndp16 [out]
|
||||
Minimum endpoint, in RGB565
|
||||
@param outMaxEndp16 [out]
|
||||
Maximum endpoint, in RGB565
|
||||
*/
|
||||
void OptimizeColorsBlock(const uint srcPixelsBlock[16], out float outMinEndp16, out float outMaxEndp16) {
|
||||
// determine color distribution
|
||||
float3 avgColor;
|
||||
float3 minColor;
|
||||
float3 maxColor;
|
||||
|
||||
avgColor = minColor = maxColor = unpackUnorm4x8(srcPixelsBlock[0]).xyz;
|
||||
for (int i = 1; i < 16; ++i) {
|
||||
const float3 currColorUnorm = unpackUnorm4x8(srcPixelsBlock[i]).xyz;
|
||||
avgColor += currColorUnorm;
|
||||
minColor = min(minColor, currColorUnorm);
|
||||
maxColor = max(maxColor, currColorUnorm);
|
||||
}
|
||||
|
||||
avgColor = round(avgColor * 255.0f / 16.0f);
|
||||
maxColor *= 255.0f;
|
||||
minColor *= 255.0f;
|
||||
|
||||
// determine covariance matrix
|
||||
float cov[6];
|
||||
for (int i = 0; i < 6; ++i)
|
||||
cov[i] = 0;
|
||||
|
||||
for (int i = 0; i < 16; ++i) {
|
||||
const float3 currColor = unpackUnorm4x8(srcPixelsBlock[i]).xyz * 255.0f;
|
||||
float3 rgbDiff = currColor - avgColor;
|
||||
|
||||
cov[0] += rgbDiff.r * rgbDiff.r;
|
||||
cov[1] += rgbDiff.r * rgbDiff.g;
|
||||
cov[2] += rgbDiff.r * rgbDiff.b;
|
||||
cov[3] += rgbDiff.g * rgbDiff.g;
|
||||
cov[4] += rgbDiff.g * rgbDiff.b;
|
||||
cov[5] += rgbDiff.b * rgbDiff.b;
|
||||
}
|
||||
|
||||
// convert covariance matrix to float, find principal axis via power iter
|
||||
for (int i = 0; i < 6; ++i)
|
||||
cov[i] /= 255.0f;
|
||||
|
||||
float3 vF = maxColor - minColor;
|
||||
|
||||
const int nIterPower = 4;
|
||||
for (int iter = 0; iter < nIterPower; ++iter) {
|
||||
const float r = vF.r * cov[0] + vF.g * cov[1] + vF.b * cov[2];
|
||||
const float g = vF.r * cov[1] + vF.g * cov[3] + vF.b * cov[4];
|
||||
const float b = vF.r * cov[2] + vF.g * cov[4] + vF.b * cov[5];
|
||||
|
||||
vF.r = r;
|
||||
vF.g = g;
|
||||
vF.b = b;
|
||||
}
|
||||
|
||||
float magn = max3(abs(vF.r), abs(vF.g), abs(vF.b));
|
||||
float3 v;
|
||||
|
||||
if (magn < 4.0f) { // too small, default to luminance
|
||||
v.r = 299.0f; // JPEG YCbCr luma coefs, scaled by 1000.
|
||||
v.g = 587.0f;
|
||||
v.b = 114.0f;
|
||||
} else {
|
||||
v = trunc(vF * (512.0f / magn));
|
||||
}
|
||||
|
||||
// Pick colors at extreme points
|
||||
float3 minEndpoint, maxEndpoint;
|
||||
float minDot = FLT_MAX;
|
||||
float maxDot = -FLT_MAX;
|
||||
for (int i = 0; i < 16; ++i) {
|
||||
const float3 currColor = unpackUnorm4x8(srcPixelsBlock[i]).xyz * 255.0f;
|
||||
const float dotValue = dot(currColor, v);
|
||||
|
||||
if (dotValue < minDot) {
|
||||
minDot = dotValue;
|
||||
minEndpoint = currColor;
|
||||
}
|
||||
|
||||
if (dotValue > maxDot) {
|
||||
maxDot = dotValue;
|
||||
maxEndpoint = currColor;
|
||||
}
|
||||
}
|
||||
|
||||
outMinEndp16 = rgb888to565(minEndpoint);
|
||||
outMaxEndp16 = rgb888to565(maxEndpoint);
|
||||
}
|
||||
|
||||
// The color matching function
|
||||
uint MatchColorsBlock(const uint srcPixelsBlock[16], float3 color[4]) {
|
||||
uint mask = 0u;
|
||||
float3 dir = color[0] - color[1];
|
||||
float stops[4];
|
||||
|
||||
for (int i = 0; i < 4; ++i)
|
||||
stops[i] = dot(color[i], dir);
|
||||
|
||||
// think of the colors as arranged on a line; project point onto that line, then choose
|
||||
// next color out of available ones. we compute the crossover points for "best color in top
|
||||
// half"/"best in bottom half" and then the same inside that subinterval.
|
||||
//
|
||||
// relying on this 1d approximation isn't always optimal in terms of euclidean distance,
|
||||
// but it's very close and a lot faster.
|
||||
// http://cbloomrants.blogspot.com/2008/12/12-08-08-dxtc-summary.html
|
||||
|
||||
float c0Point = trunc((stops[1] + stops[3]) * 0.5f);
|
||||
float halfPoint = trunc((stops[3] + stops[2]) * 0.5f);
|
||||
float c3Point = trunc((stops[2] + stops[0]) * 0.5f);
|
||||
|
||||
#ifndef BC1_DITHER
|
||||
// the version without dithering is straightforward
|
||||
for (uint i = 16u; i-- > 0u;) {
|
||||
const float3 currColor = unpackUnorm4x8(srcPixelsBlock[i]).xyz * 255.0f;
|
||||
|
||||
const float dotValue = dot(currColor, dir);
|
||||
mask <<= 2u;
|
||||
|
||||
if (dotValue < halfPoint)
|
||||
mask |= ((dotValue < c0Point) ? 1u : 3u);
|
||||
else
|
||||
mask |= ((dotValue < c3Point) ? 2u : 0u);
|
||||
}
|
||||
#else
|
||||
// with floyd-steinberg dithering
|
||||
float4 ep1 = float4(0, 0, 0, 0);
|
||||
float4 ep2 = float4(0, 0, 0, 0);
|
||||
|
||||
c0Point *= 16.0f;
|
||||
halfPoint *= 16.0f;
|
||||
c3Point *= 16.0f;
|
||||
|
||||
for (uint y = 0u; y < 4u; ++y) {
|
||||
float ditherDot;
|
||||
uint lmask, step;
|
||||
|
||||
float3 currColor;
|
||||
float dotValue;
|
||||
|
||||
currColor = unpackUnorm4x8(srcPixelsBlock[y * 4 + 0]).xyz * 255.0f;
|
||||
dotValue = dot(currColor, dir);
|
||||
|
||||
ditherDot = (dotValue * 16.0f) + (3 * ep2[1] + 5 * ep2[0]);
|
||||
if (ditherDot < halfPoint)
|
||||
step = (ditherDot < c0Point) ? 1u : 3u;
|
||||
else
|
||||
step = (ditherDot < c3Point) ? 2u : 0u;
|
||||
ep1[0] = dotValue - stops[step];
|
||||
lmask = step;
|
||||
|
||||
currColor = unpackUnorm4x8(srcPixelsBlock[y * 4 + 1]).xyz * 255.0f;
|
||||
dotValue = dot(currColor, dir);
|
||||
|
||||
ditherDot = (dotValue * 16.0f) + (7 * ep1[0] + 3 * ep2[2] + 5 * ep2[1] + ep2[0]);
|
||||
if (ditherDot < halfPoint)
|
||||
step = (ditherDot < c0Point) ? 1u : 3u;
|
||||
else
|
||||
step = (ditherDot < c3Point) ? 2u : 0u;
|
||||
ep1[1] = dotValue - stops[step];
|
||||
lmask |= step << 2u;
|
||||
|
||||
currColor = unpackUnorm4x8(srcPixelsBlock[y * 4 + 2]).xyz * 255.0f;
|
||||
dotValue = dot(currColor, dir);
|
||||
|
||||
ditherDot = (dotValue * 16.0f) + (7 * ep1[1] + 3 * ep2[3] + 5 * ep2[2] + ep2[1]);
|
||||
if (ditherDot < halfPoint)
|
||||
step = (ditherDot < c0Point) ? 1u : 3u;
|
||||
else
|
||||
step = (ditherDot < c3Point) ? 2u : 0u;
|
||||
ep1[2] = dotValue - stops[step];
|
||||
lmask |= step << 4u;
|
||||
|
||||
currColor = unpackUnorm4x8(srcPixelsBlock[y * 4 + 2]).xyz * 255.0f;
|
||||
dotValue = dot(currColor, dir);
|
||||
|
||||
ditherDot = (dotValue * 16.0f) + (7 * ep1[2] + 5 * ep2[3] + ep2[2]);
|
||||
if (ditherDot < halfPoint)
|
||||
step = (ditherDot < c0Point) ? 1u : 3u;
|
||||
else
|
||||
step = (ditherDot < c3Point) ? 2u : 0u;
|
||||
ep1[3] = dotValue - stops[step];
|
||||
lmask |= step << 6u;
|
||||
|
||||
mask |= lmask << (y * 8u);
|
||||
{
|
||||
float4 tmp = ep1;
|
||||
ep1 = ep2;
|
||||
ep2 = tmp;
|
||||
} // swap
|
||||
}
|
||||
#endif
|
||||
|
||||
return mask;
|
||||
}
|
||||
|
||||
// The refinement function. (Clever code, part 2)
|
||||
// Tries to optimize colors to suit block contents better.
|
||||
// (By solving a least squares system via normal equations+Cramer's rule)
|
||||
bool RefineBlock(const uint srcPixelsBlock[16], uint mask, inout float inOutMinEndp16,
|
||||
inout float inOutMaxEndp16) {
|
||||
float newMin16, newMax16;
|
||||
const float oldMin = inOutMinEndp16;
|
||||
const float oldMax = inOutMaxEndp16;
|
||||
|
||||
if ((mask ^ (mask << 2u)) < 4u) // all pixels have the same index?
|
||||
{
|
||||
// yes, linear system would be singular; solve using optimal
|
||||
// single-color match on average color
|
||||
float3 rgbVal = float3(8.0f / 255.0f, 8.0f / 255.0f, 8.0f / 255.0f);
|
||||
for (int i = 0; i < 16; ++i)
|
||||
rgbVal += unpackUnorm4x8(srcPixelsBlock[i]).xyz;
|
||||
|
||||
rgbVal = floor(rgbVal * (255.0f / 16.0f));
|
||||
|
||||
newMax16 = c_oMatch5[uint(rgbVal.r)][0] * 2048.0f + //
|
||||
c_oMatch6[uint(rgbVal.g)][0] * 32.0f + //
|
||||
c_oMatch5[uint(rgbVal.b)][0];
|
||||
newMin16 = c_oMatch5[uint(rgbVal.r)][1] * 2048.0f + //
|
||||
c_oMatch6[uint(rgbVal.g)][1] * 32.0f + //
|
||||
c_oMatch5[uint(rgbVal.b)][1];
|
||||
} else {
|
||||
const float w1Tab[4] = { 3, 0, 2, 1 };
|
||||
const float prods[4] = { 589824.0f, 2304.0f, 262402.0f, 66562.0f };
|
||||
// ^some magic to save a lot of multiplies in the accumulating loop...
|
||||
// (precomputed products of weights for least squares system, accumulated inside one 32-bit
|
||||
// register)
|
||||
|
||||
float akku = 0.0f;
|
||||
uint cm = mask;
|
||||
float3 at1 = float3(0, 0, 0);
|
||||
float3 at2 = float3(0, 0, 0);
|
||||
for (int i = 0; i < 16; ++i, cm >>= 2u) {
|
||||
const float3 currColor = unpackUnorm4x8(srcPixelsBlock[i]).xyz * 255.0f;
|
||||
|
||||
const uint step = cm & 3u;
|
||||
const float w1 = w1Tab[step];
|
||||
akku += prods[step];
|
||||
at1 += currColor * w1;
|
||||
at2 += currColor;
|
||||
}
|
||||
|
||||
at2 = 3.0f * at2 - at1;
|
||||
|
||||
// extract solutions and decide solvability
|
||||
const float xx = floor(akku / 65535.0f);
|
||||
const float yy = floor(mod(akku, 65535.0f) / 256.0f);
|
||||
const float xy = mod(akku, 256.0f);
|
||||
|
||||
float2 f_rb_g;
|
||||
f_rb_g.x = 3.0f * 31.0f / 255.0f / (xx * yy - xy * xy);
|
||||
f_rb_g.y = f_rb_g.x * 63.0f / 31.0f;
|
||||
|
||||
// solve.
|
||||
const float3 newMaxVal = clamp(floor((at1 * yy - at2 * xy) * f_rb_g.xyx + 0.5f),
|
||||
float3(0.0f, 0.0f, 0.0f), float3(31, 63, 31));
|
||||
newMax16 = newMaxVal.x * 2048.0f + newMaxVal.y * 32.0f + newMaxVal.z;
|
||||
|
||||
const float3 newMinVal = clamp(floor((at2 * xx - at1 * xy) * f_rb_g.xyx + 0.5f),
|
||||
float3(0.0f, 0.0f, 0.0f), float3(31, 63, 31));
|
||||
newMin16 = newMinVal.x * 2048.0f + newMinVal.y * 32.0f + newMinVal.z;
|
||||
}
|
||||
|
||||
inOutMinEndp16 = newMin16;
|
||||
inOutMaxEndp16 = newMax16;
|
||||
|
||||
return oldMin != newMin16 || oldMax != newMax16;
|
||||
}
|
||||
|
||||
#ifdef BC1_DITHER
|
||||
/// Quantizes 'srcValue' which is originally in 888 (full range),
|
||||
/// converting it to 565 and then back to 888 (quantized)
|
||||
float3 quant(float3 srcValue) {
|
||||
srcValue = clamp(srcValue, 0.0f, 255.0f);
|
||||
// Convert 888 -> 565
|
||||
srcValue = floor(srcValue * float3(31.0f / 255.0f, 63.0f / 255.0f, 31.0f / 255.0f) + 0.5f);
|
||||
// Convert 565 -> 888 back
|
||||
srcValue = floor(srcValue * float3(8.25f, 4.0625f, 8.25f));
|
||||
|
||||
return srcValue;
|
||||
}
|
||||
|
||||
void DitherBlock(const uint srcPixBlck[16], out uint dthPixBlck[16]) {
|
||||
float3 ep1[4] = { float3(0, 0, 0), float3(0, 0, 0), float3(0, 0, 0), float3(0, 0, 0) };
|
||||
float3 ep2[4] = { float3(0, 0, 0), float3(0, 0, 0), float3(0, 0, 0), float3(0, 0, 0) };
|
||||
|
||||
for (uint y = 0u; y < 16u; y += 4u) {
|
||||
float3 srcPixel, dithPixel;
|
||||
|
||||
srcPixel = unpackUnorm4x8(srcPixBlck[y + 0u]).xyz * 255.0f;
|
||||
dithPixel = quant(srcPixel + trunc((3 * ep2[1] + 5 * ep2[0]) * (1.0f / 16.0f)));
|
||||
ep1[0] = srcPixel - dithPixel;
|
||||
dthPixBlck[y + 0u] = packUnorm4x8(float4(dithPixel * (1.0f / 255.0f), 1.0f));
|
||||
|
||||
srcPixel = unpackUnorm4x8(srcPixBlck[y + 1u]).xyz * 255.0f;
|
||||
dithPixel = quant(
|
||||
srcPixel + trunc((7 * ep1[0] + 3 * ep2[2] + 5 * ep2[1] + ep2[0]) * (1.0f / 16.0f)));
|
||||
ep1[1] = srcPixel - dithPixel;
|
||||
dthPixBlck[y + 1u] = packUnorm4x8(float4(dithPixel * (1.0f / 255.0f), 1.0f));
|
||||
|
||||
srcPixel = unpackUnorm4x8(srcPixBlck[y + 2u]).xyz * 255.0f;
|
||||
dithPixel = quant(
|
||||
srcPixel + trunc((7 * ep1[1] + 3 * ep2[3] + 5 * ep2[2] + ep2[1]) * (1.0f / 16.0f)));
|
||||
ep1[2] = srcPixel - dithPixel;
|
||||
dthPixBlck[y + 2u] = packUnorm4x8(float4(dithPixel * (1.0f / 255.0f), 1.0f));
|
||||
|
||||
srcPixel = unpackUnorm4x8(srcPixBlck[y + 3u]).xyz * 255.0f;
|
||||
dithPixel = quant(srcPixel + trunc((7 * ep1[2] + 5 * ep2[3] + ep2[2]) * (1.0f / 16.0f)));
|
||||
ep1[3] = srcPixel - dithPixel;
|
||||
dthPixBlck[y + 3u] = packUnorm4x8(float4(dithPixel * (1.0f / 255.0f), 1.0f));
|
||||
|
||||
// swap( ep1, ep2 )
|
||||
for (uint i = 0u; i < 4u; ++i) {
|
||||
float3 tmp = ep1[i];
|
||||
ep1[i] = ep2[i];
|
||||
ep2[i] = tmp;
|
||||
}
|
||||
}
|
||||
}
|
||||
#endif
|
||||
|
||||
void main() {
|
||||
uint srcPixelsBlock[16];
|
||||
|
||||
bool bAllColorsEqual = true;
|
||||
|
||||
// Load the whole 4x4 block
|
||||
const uint2 pixelsToLoadBase = gl_GlobalInvocationID.xy << 2u;
|
||||
for (uint i = 0u; i < 16u; ++i) {
|
||||
const uint2 pixelsToLoad = pixelsToLoadBase + uint2(i & 0x03u, i >> 2u);
|
||||
const float3 srcPixels0 = OGRE_Load2D(srcTex, int2(pixelsToLoad), 0).xyz;
|
||||
srcPixelsBlock[i] = packUnorm4x8(float4(srcPixels0, 1.0f));
|
||||
bAllColorsEqual = bAllColorsEqual && srcPixelsBlock[0] == srcPixelsBlock[i];
|
||||
}
|
||||
|
||||
float maxEndp16, minEndp16;
|
||||
uint mask = 0u;
|
||||
|
||||
if (bAllColorsEqual) {
|
||||
const uint3 rgbVal = uint3(unpackUnorm4x8(srcPixelsBlock[0]).xyz * 255.0f);
|
||||
mask = 0xAAAAAAAAu;
|
||||
maxEndp16 =
|
||||
c_oMatch5[rgbVal.r][0] * 2048.0f + c_oMatch6[rgbVal.g][0] * 32.0f + c_oMatch5[rgbVal.b][0];
|
||||
minEndp16 =
|
||||
c_oMatch5[rgbVal.r][1] * 2048.0f + c_oMatch6[rgbVal.g][1] * 32.0f + c_oMatch5[rgbVal.b][1];
|
||||
} else {
|
||||
#ifdef BC1_DITHER
|
||||
uint ditherPixelsBlock[16];
|
||||
// first step: compute dithered version for PCA if desired
|
||||
DitherBlock(srcPixelsBlock, ditherPixelsBlock);
|
||||
#else
|
||||
#define ditherPixelsBlock srcPixelsBlock
|
||||
#endif
|
||||
|
||||
// second step: pca+map along principal axis
|
||||
OptimizeColorsBlock(ditherPixelsBlock, minEndp16, maxEndp16);
|
||||
if (minEndp16 != maxEndp16) {
|
||||
float3 colors[4];
|
||||
EvalColors(colors, maxEndp16, minEndp16); // Note min/max are inverted
|
||||
mask = MatchColorsBlock(srcPixelsBlock, colors);
|
||||
}
|
||||
|
||||
// third step: refine (multiple times if requested)
|
||||
bool bStopRefinement = false;
|
||||
for (uint i = 0u; i < params.p_numRefinements && !bStopRefinement; ++i) {
|
||||
const uint lastMask = mask;
|
||||
|
||||
if (RefineBlock(ditherPixelsBlock, mask, minEndp16, maxEndp16)) {
|
||||
if (minEndp16 != maxEndp16) {
|
||||
float3 colors[4];
|
||||
EvalColors(colors, maxEndp16, minEndp16); // Note min/max are inverted
|
||||
mask = MatchColorsBlock(srcPixelsBlock, colors);
|
||||
} else {
|
||||
mask = 0u;
|
||||
bStopRefinement = true;
|
||||
}
|
||||
}
|
||||
|
||||
bStopRefinement = mask == lastMask || bStopRefinement;
|
||||
}
|
||||
}
|
||||
|
||||
// write the color block
|
||||
if (maxEndp16 < minEndp16) {
|
||||
const float tmpValue = minEndp16;
|
||||
minEndp16 = maxEndp16;
|
||||
maxEndp16 = tmpValue;
|
||||
mask ^= 0x55555555u;
|
||||
}
|
||||
|
||||
uint2 outputBytes;
|
||||
outputBytes.x = uint(maxEndp16) | (uint(minEndp16) << 16u);
|
||||
outputBytes.y = mask;
|
||||
|
||||
uint2 dstUV = gl_GlobalInvocationID.xy;
|
||||
imageStore(dstTexture, int2(dstUV), uint4(outputBytes.xy, 0u, 0u));
|
||||
}
|
1061
modules/betsy/betsy_bc1.h
Normal file
1061
modules/betsy/betsy_bc1.h
Normal file
File diff suppressed because it is too large
Load Diff
@ -30,39 +30,17 @@
|
||||
|
||||
#include "image_compress_betsy.h"
|
||||
|
||||
#include "servers/rendering/rendering_device_binds.h"
|
||||
#include "servers/rendering/rendering_server_default.h"
|
||||
#include "core/config/project_settings.h"
|
||||
|
||||
#if defined(VULKAN_ENABLED)
|
||||
#include "drivers/vulkan/rendering_context_driver_vulkan.h"
|
||||
#endif
|
||||
#if defined(METAL_ENABLED)
|
||||
#include "drivers/metal/rendering_context_driver_metal.h"
|
||||
#endif
|
||||
#include "betsy_bc1.h"
|
||||
|
||||
#include "bc1.glsl.gen.h"
|
||||
#include "bc6h.glsl.gen.h"
|
||||
|
||||
struct BC6PushConstant {
|
||||
float sizeX;
|
||||
float sizeY;
|
||||
uint32_t padding[2];
|
||||
};
|
||||
|
||||
static int get_next_multiple(int n, int m) {
|
||||
return n + (m - (n % m));
|
||||
}
|
||||
|
||||
Error _compress_betsy(BetsyFormat p_format, Image *r_img) {
|
||||
uint64_t start_time = OS::get_singleton()->get_ticks_msec();
|
||||
|
||||
if (r_img->is_compressed()) {
|
||||
return ERR_INVALID_DATA;
|
||||
}
|
||||
|
||||
ERR_FAIL_COND_V_MSG(r_img->get_format() < Image::FORMAT_RF || r_img->get_format() > Image::FORMAT_RGBE9995, ERR_INVALID_DATA, "Image is not an HDR image.");
|
||||
|
||||
Error err = OK;
|
||||
static Mutex betsy_mutex;
|
||||
static BetsyCompressor *betsy = nullptr;
|
||||
|
||||
void BetsyCompressor::_init() {
|
||||
// Create local RD.
|
||||
RenderingContextDriver *rcd = nullptr;
|
||||
RenderingDevice *rd = RenderingServer::get_singleton()->create_local_rendering_device();
|
||||
@ -81,7 +59,7 @@ Error _compress_betsy(BetsyFormat p_format, Image *r_img) {
|
||||
#endif
|
||||
#endif
|
||||
if (rcd != nullptr && rd != nullptr) {
|
||||
err = rcd->initialize();
|
||||
Error err = rcd->initialize();
|
||||
if (err == OK) {
|
||||
err = rd->initialize(rcd);
|
||||
}
|
||||
@ -95,58 +73,201 @@ Error _compress_betsy(BetsyFormat p_format, Image *r_img) {
|
||||
}
|
||||
}
|
||||
|
||||
ERR_FAIL_NULL_V_MSG(rd, err, "Unable to create a local RenderingDevice.");
|
||||
ERR_FAIL_NULL_MSG(rd, "Unable to create a local RenderingDevice.");
|
||||
|
||||
Ref<RDShaderFile> compute_shader;
|
||||
compute_shader.instantiate();
|
||||
compress_rd = rd;
|
||||
compress_rcd = rcd;
|
||||
|
||||
// Create the sampler state.
|
||||
RD::SamplerState src_sampler_state;
|
||||
{
|
||||
src_sampler_state.repeat_u = RD::SAMPLER_REPEAT_MODE_CLAMP_TO_EDGE;
|
||||
src_sampler_state.repeat_v = RD::SAMPLER_REPEAT_MODE_CLAMP_TO_EDGE;
|
||||
src_sampler_state.mag_filter = RD::SAMPLER_FILTER_NEAREST;
|
||||
src_sampler_state.min_filter = RD::SAMPLER_FILTER_NEAREST;
|
||||
src_sampler_state.mip_filter = RD::SAMPLER_FILTER_NEAREST;
|
||||
}
|
||||
|
||||
src_sampler = compress_rd->sampler_create(src_sampler_state);
|
||||
}
|
||||
|
||||
void BetsyCompressor::init() {
|
||||
WorkerThreadPool::TaskID tid = WorkerThreadPool::get_singleton()->add_task(callable_mp(this, &BetsyCompressor::_thread_loop), true);
|
||||
command_queue.set_pump_task_id(tid);
|
||||
command_queue.push(this, &BetsyCompressor::_assign_mt_ids, tid);
|
||||
command_queue.push_and_sync(this, &BetsyCompressor::_init);
|
||||
DEV_ASSERT(task_id == tid);
|
||||
}
|
||||
|
||||
void BetsyCompressor::_assign_mt_ids(WorkerThreadPool::TaskID p_pump_task_id) {
|
||||
task_id = p_pump_task_id;
|
||||
}
|
||||
|
||||
// Yield thread to WTP so other tasks can be done on it.
|
||||
// Automatically regains control as soon a task is pushed to the command queue.
|
||||
void BetsyCompressor::_thread_loop() {
|
||||
while (!exit) {
|
||||
WorkerThreadPool::get_singleton()->yield();
|
||||
command_queue.flush_all();
|
||||
}
|
||||
}
|
||||
|
||||
void BetsyCompressor::_thread_exit() {
|
||||
exit = true;
|
||||
|
||||
if (compress_rd != nullptr) {
|
||||
if (dxt1_encoding_table_buffer.is_valid()) {
|
||||
compress_rd->free(dxt1_encoding_table_buffer);
|
||||
}
|
||||
|
||||
compress_rd->free(src_sampler);
|
||||
|
||||
// Clear the shader cache, pipelines will be unreferenced automatically.
|
||||
for (KeyValue<String, BetsyShader> &E : cached_shaders) {
|
||||
if (E.value.compiled.is_valid()) {
|
||||
compress_rd->free(E.value.compiled);
|
||||
}
|
||||
}
|
||||
cached_shaders.clear();
|
||||
}
|
||||
}
|
||||
|
||||
void BetsyCompressor::finish() {
|
||||
command_queue.push(this, &BetsyCompressor::_thread_exit);
|
||||
if (task_id != WorkerThreadPool::INVALID_TASK_ID) {
|
||||
WorkerThreadPool::get_singleton()->wait_for_task_completion(task_id);
|
||||
task_id = WorkerThreadPool::INVALID_TASK_ID;
|
||||
}
|
||||
|
||||
if (compress_rd != nullptr) {
|
||||
// Free the RD (and RCD if necessary).
|
||||
memdelete(compress_rd);
|
||||
compress_rd = nullptr;
|
||||
if (compress_rcd != nullptr) {
|
||||
memdelete(compress_rcd);
|
||||
compress_rcd = nullptr;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Helper functions.
|
||||
|
||||
static int get_next_multiple(int n, int m) {
|
||||
return n + (m - (n % m));
|
||||
}
|
||||
|
||||
static String get_shader_name(BetsyFormat p_format) {
|
||||
switch (p_format) {
|
||||
case BETSY_FORMAT_BC1:
|
||||
case BETSY_FORMAT_BC1_DITHER:
|
||||
return "BC1";
|
||||
|
||||
case BETSY_FORMAT_BC3:
|
||||
return "BC3";
|
||||
|
||||
case BETSY_FORMAT_BC6_SIGNED:
|
||||
case BETSY_FORMAT_BC6_UNSIGNED:
|
||||
return "BC6";
|
||||
|
||||
default:
|
||||
return "";
|
||||
}
|
||||
}
|
||||
|
||||
Error BetsyCompressor::_compress(BetsyFormat p_format, Image *r_img) {
|
||||
uint64_t start_time = OS::get_singleton()->get_ticks_msec();
|
||||
|
||||
if (r_img->is_compressed()) {
|
||||
return ERR_INVALID_DATA;
|
||||
}
|
||||
|
||||
Error err = OK;
|
||||
|
||||
// Destination format.
|
||||
Image::Format dest_format = Image::FORMAT_MAX;
|
||||
RD::DataFormat dst_rd_format = RD::DATA_FORMAT_MAX;
|
||||
|
||||
String version = "";
|
||||
|
||||
switch (p_format) {
|
||||
case BETSY_FORMAT_BC6: {
|
||||
err = compute_shader->parse_versions_from_text(bc6h_shader_glsl);
|
||||
case BETSY_FORMAT_BC1:
|
||||
version = "standard";
|
||||
dst_rd_format = RD::DATA_FORMAT_R32G32_UINT;
|
||||
dest_format = Image::FORMAT_DXT1;
|
||||
break;
|
||||
|
||||
if (r_img->detect_signed(true)) {
|
||||
dest_format = Image::FORMAT_BPTC_RGBF;
|
||||
version = "signed";
|
||||
} else {
|
||||
dest_format = Image::FORMAT_BPTC_RGBFU;
|
||||
version = "unsigned";
|
||||
}
|
||||
case BETSY_FORMAT_BC1_DITHER:
|
||||
version = "dithered";
|
||||
dst_rd_format = RD::DATA_FORMAT_R32G32_UINT;
|
||||
dest_format = Image::FORMAT_DXT1;
|
||||
break;
|
||||
|
||||
} break;
|
||||
case BETSY_FORMAT_BC6_SIGNED:
|
||||
version = "signed";
|
||||
dst_rd_format = RD::DATA_FORMAT_R32G32B32A32_UINT;
|
||||
dest_format = Image::FORMAT_BPTC_RGBF;
|
||||
break;
|
||||
|
||||
case BETSY_FORMAT_BC6_UNSIGNED:
|
||||
version = "unsigned";
|
||||
dst_rd_format = RD::DATA_FORMAT_R32G32B32A32_UINT;
|
||||
dest_format = Image::FORMAT_BPTC_RGBFU;
|
||||
break;
|
||||
|
||||
default:
|
||||
err = ERR_INVALID_PARAMETER;
|
||||
break;
|
||||
}
|
||||
|
||||
if (err != OK) {
|
||||
compute_shader->print_errors("Betsy compress shader");
|
||||
memdelete(rd);
|
||||
if (rcd != nullptr) {
|
||||
memdelete(rcd);
|
||||
const String shader_name = get_shader_name(p_format) + "-" + version;
|
||||
BetsyShader shader;
|
||||
|
||||
if (cached_shaders.has(shader_name)) {
|
||||
shader = cached_shaders[shader_name];
|
||||
|
||||
} else {
|
||||
Ref<RDShaderFile> source;
|
||||
source.instantiate();
|
||||
|
||||
switch (p_format) {
|
||||
case BETSY_FORMAT_BC1:
|
||||
case BETSY_FORMAT_BC1_DITHER:
|
||||
err = source->parse_versions_from_text(bc1_shader_glsl);
|
||||
break;
|
||||
|
||||
case BETSY_FORMAT_BC6_UNSIGNED:
|
||||
case BETSY_FORMAT_BC6_SIGNED:
|
||||
err = source->parse_versions_from_text(bc6h_shader_glsl);
|
||||
break;
|
||||
|
||||
default:
|
||||
err = ERR_INVALID_PARAMETER;
|
||||
break;
|
||||
}
|
||||
|
||||
return err;
|
||||
}
|
||||
|
||||
// Compile the shader, return early if invalid.
|
||||
RID shader = rd->shader_create_from_spirv(compute_shader->get_spirv_stages(version));
|
||||
|
||||
if (shader.is_null()) {
|
||||
memdelete(rd);
|
||||
if (rcd != nullptr) {
|
||||
memdelete(rcd);
|
||||
if (err != OK) {
|
||||
source->print_errors("Betsy compress shader");
|
||||
return err;
|
||||
}
|
||||
|
||||
return err;
|
||||
// Compile the shader, return early if invalid.
|
||||
shader.compiled = compress_rd->shader_create_from_spirv(source->get_spirv_stages(version));
|
||||
if (shader.compiled.is_null()) {
|
||||
return ERR_CANT_CREATE;
|
||||
}
|
||||
|
||||
// Compile the pipeline, return early if invalid.
|
||||
shader.pipeline = compress_rd->compute_pipeline_create(shader.compiled);
|
||||
if (shader.pipeline.is_null()) {
|
||||
return ERR_CANT_CREATE;
|
||||
}
|
||||
|
||||
cached_shaders[shader_name] = shader;
|
||||
}
|
||||
|
||||
RID pipeline = rd->compute_pipeline_create(shader);
|
||||
if (shader.compiled.is_null() || shader.pipeline.is_null()) {
|
||||
return ERR_INVALID_DATA;
|
||||
}
|
||||
|
||||
// src_texture format information.
|
||||
RD::TextureFormat src_texture_format;
|
||||
@ -159,6 +280,33 @@ Error _compress_betsy(BetsyFormat p_format, Image *r_img) {
|
||||
}
|
||||
|
||||
switch (r_img->get_format()) {
|
||||
case Image::FORMAT_L8:
|
||||
r_img->convert(Image::FORMAT_RGBA8);
|
||||
src_texture_format.format = RD::DATA_FORMAT_R8G8B8A8_UNORM;
|
||||
break;
|
||||
|
||||
case Image::FORMAT_LA8:
|
||||
r_img->convert(Image::FORMAT_RGBA8);
|
||||
src_texture_format.format = RD::DATA_FORMAT_R8G8B8A8_UNORM;
|
||||
break;
|
||||
|
||||
case Image::FORMAT_R8:
|
||||
src_texture_format.format = RD::DATA_FORMAT_R8_UNORM;
|
||||
break;
|
||||
|
||||
case Image::FORMAT_RG8:
|
||||
src_texture_format.format = RD::DATA_FORMAT_R8G8_UNORM;
|
||||
break;
|
||||
|
||||
case Image::FORMAT_RGB8:
|
||||
r_img->convert(Image::FORMAT_RGBA8);
|
||||
src_texture_format.format = RD::DATA_FORMAT_R8G8B8A8_UNORM;
|
||||
break;
|
||||
|
||||
case Image::FORMAT_RGBA8:
|
||||
src_texture_format.format = RD::DATA_FORMAT_R8G8B8A8_UNORM;
|
||||
break;
|
||||
|
||||
case Image::FORMAT_RH:
|
||||
src_texture_format.format = RD::DATA_FORMAT_R16_SFLOAT;
|
||||
break;
|
||||
@ -198,33 +346,23 @@ Error _compress_betsy(BetsyFormat p_format, Image *r_img) {
|
||||
break;
|
||||
|
||||
default: {
|
||||
rd->free(shader);
|
||||
|
||||
memdelete(rd);
|
||||
if (rcd != nullptr) {
|
||||
memdelete(rcd);
|
||||
}
|
||||
|
||||
return err;
|
||||
}
|
||||
}
|
||||
|
||||
// Create the sampler state.
|
||||
RD::SamplerState src_sampler_state;
|
||||
{
|
||||
src_sampler_state.repeat_u = RD::SAMPLER_REPEAT_MODE_CLAMP_TO_EDGE;
|
||||
src_sampler_state.repeat_v = RD::SAMPLER_REPEAT_MODE_CLAMP_TO_EDGE;
|
||||
src_sampler_state.mag_filter = RD::SAMPLER_FILTER_NEAREST;
|
||||
src_sampler_state.min_filter = RD::SAMPLER_FILTER_NEAREST;
|
||||
src_sampler_state.mip_filter = RD::SAMPLER_FILTER_NEAREST;
|
||||
}
|
||||
|
||||
RID src_sampler = rd->sampler_create(src_sampler_state);
|
||||
|
||||
// For the destination format just copy the source format and change the usage bits.
|
||||
RD::TextureFormat dst_texture_format = src_texture_format;
|
||||
dst_texture_format.usage_bits = RD::TEXTURE_USAGE_COLOR_ATTACHMENT_BIT | RD::TEXTURE_USAGE_STORAGE_BIT | RD::TEXTURE_USAGE_CAN_COPY_FROM_BIT | RD::TEXTURE_USAGE_CAN_COPY_TO_BIT | RD::TEXTURE_USAGE_CAN_UPDATE_BIT;
|
||||
dst_texture_format.format = RD::DATA_FORMAT_R32G32B32A32_UINT;
|
||||
dst_texture_format.format = dst_rd_format;
|
||||
|
||||
// Encoding table setup.
|
||||
if (dest_format == Image::FORMAT_DXT1 && dxt1_encoding_table_buffer.is_null()) {
|
||||
Vector<uint8_t> data;
|
||||
data.resize(1024 * 4);
|
||||
memcpy(data.ptrw(), dxt1_encoding_table, 1024 * 4);
|
||||
|
||||
dxt1_encoding_table_buffer = compress_rd->storage_buffer_create(1024 * 4, data);
|
||||
}
|
||||
|
||||
const int mip_count = r_img->get_mipmap_count() + 1;
|
||||
|
||||
@ -256,8 +394,41 @@ Error _compress_betsy(BetsyFormat p_format, Image *r_img) {
|
||||
memcpy(src_image_ptr[0].ptrw(), r_img->ptr() + ofs, size);
|
||||
|
||||
// Create the textures on the GPU.
|
||||
RID src_texture = rd->texture_create(src_texture_format, RD::TextureView(), src_images);
|
||||
RID dst_texture = rd->texture_create(dst_texture_format, RD::TextureView());
|
||||
RID src_texture = compress_rd->texture_create(src_texture_format, RD::TextureView(), src_images);
|
||||
RID dst_texture = compress_rd->texture_create(dst_texture_format, RD::TextureView());
|
||||
|
||||
Vector<RD::Uniform> uniforms;
|
||||
{
|
||||
{
|
||||
RD::Uniform u;
|
||||
u.uniform_type = RD::UNIFORM_TYPE_SAMPLER_WITH_TEXTURE;
|
||||
u.binding = 0;
|
||||
u.append_id(src_sampler);
|
||||
u.append_id(src_texture);
|
||||
uniforms.push_back(u);
|
||||
}
|
||||
{
|
||||
RD::Uniform u;
|
||||
u.uniform_type = RD::UNIFORM_TYPE_IMAGE;
|
||||
u.binding = 1;
|
||||
u.append_id(dst_texture);
|
||||
uniforms.push_back(u);
|
||||
}
|
||||
|
||||
if (dest_format == Image::FORMAT_DXT1) {
|
||||
RD::Uniform u;
|
||||
u.uniform_type = RD::UNIFORM_TYPE_STORAGE_BUFFER;
|
||||
u.binding = 2;
|
||||
u.append_id(dxt1_encoding_table_buffer);
|
||||
uniforms.push_back(u);
|
||||
}
|
||||
}
|
||||
|
||||
RID uniform_set = compress_rd->uniform_set_create(uniforms, shader.compiled, 0);
|
||||
RD::ComputeListID compute_list = compress_rd->compute_list_begin();
|
||||
|
||||
compress_rd->compute_list_bind_compute_pipeline(compute_list, shader.pipeline);
|
||||
compress_rd->compute_list_bind_uniform_set(compute_list, uniform_set, 0);
|
||||
|
||||
if (dest_format == Image::FORMAT_BPTC_RGBFU || dest_format == Image::FORMAT_BPTC_RGBF) {
|
||||
BC6PushConstant push_constant;
|
||||
@ -266,47 +437,33 @@ Error _compress_betsy(BetsyFormat p_format, Image *r_img) {
|
||||
push_constant.padding[0] = 0;
|
||||
push_constant.padding[1] = 0;
|
||||
|
||||
Vector<RD::Uniform> uniforms;
|
||||
{
|
||||
{
|
||||
RD::Uniform u;
|
||||
u.uniform_type = RD::UNIFORM_TYPE_SAMPLER_WITH_TEXTURE;
|
||||
u.binding = 0;
|
||||
u.append_id(src_sampler);
|
||||
u.append_id(src_texture);
|
||||
uniforms.push_back(u);
|
||||
}
|
||||
{
|
||||
RD::Uniform u;
|
||||
u.uniform_type = RD::UNIFORM_TYPE_IMAGE;
|
||||
u.binding = 1;
|
||||
u.append_id(dst_texture);
|
||||
uniforms.push_back(u);
|
||||
}
|
||||
}
|
||||
compress_rd->compute_list_set_push_constant(compute_list, &push_constant, sizeof(BC6PushConstant));
|
||||
|
||||
RID uniform_set = rd->uniform_set_create(uniforms, shader, 0);
|
||||
RD::ComputeListID compute_list = rd->compute_list_begin();
|
||||
} else {
|
||||
BC1PushConstant push_constant;
|
||||
push_constant.num_refines = 2;
|
||||
push_constant.padding[0] = 0;
|
||||
push_constant.padding[1] = 0;
|
||||
push_constant.padding[2] = 0;
|
||||
|
||||
rd->compute_list_bind_compute_pipeline(compute_list, pipeline);
|
||||
rd->compute_list_bind_uniform_set(compute_list, uniform_set, 0);
|
||||
rd->compute_list_set_push_constant(compute_list, &push_constant, sizeof(BC6PushConstant));
|
||||
rd->compute_list_dispatch(compute_list, get_next_multiple(width, 32) / 32, get_next_multiple(height, 32) / 32, 1);
|
||||
rd->compute_list_end();
|
||||
compress_rd->compute_list_set_push_constant(compute_list, &push_constant, sizeof(BC1PushConstant));
|
||||
}
|
||||
|
||||
rd->submit();
|
||||
rd->sync();
|
||||
compress_rd->compute_list_dispatch(compute_list, get_next_multiple(width, 32) / 32, get_next_multiple(height, 32) / 32, 1);
|
||||
compress_rd->compute_list_end();
|
||||
|
||||
compress_rd->submit();
|
||||
compress_rd->sync();
|
||||
|
||||
// Copy data from the GPU to the buffer.
|
||||
const Vector<uint8_t> texture_data = rd->texture_get_data(dst_texture, 0);
|
||||
const Vector<uint8_t> texture_data = compress_rd->texture_get_data(dst_texture, 0);
|
||||
int64_t dst_ofs = Image::get_image_mipmap_offset(r_img->get_width(), r_img->get_height(), dest_format, i);
|
||||
|
||||
memcpy(dst_data_ptr + dst_ofs, texture_data.ptr(), texture_data.size());
|
||||
|
||||
// Free the source and dest texture.
|
||||
rd->free(dst_texture);
|
||||
rd->free(src_texture);
|
||||
compress_rd->free(dst_texture);
|
||||
compress_rd->free(src_texture);
|
||||
}
|
||||
|
||||
src_images.clear();
|
||||
@ -314,26 +471,67 @@ Error _compress_betsy(BetsyFormat p_format, Image *r_img) {
|
||||
// Set the compressed data to the image.
|
||||
r_img->set_data(r_img->get_width(), r_img->get_height(), r_img->has_mipmaps(), dest_format, dst_data);
|
||||
|
||||
// Free the shader (dependencies will be cleared automatically).
|
||||
rd->free(src_sampler);
|
||||
rd->free(shader);
|
||||
|
||||
memdelete(rd);
|
||||
if (rcd != nullptr) {
|
||||
memdelete(rcd);
|
||||
}
|
||||
|
||||
print_verbose(vformat("Betsy: Encoding took %d ms.", OS::get_singleton()->get_ticks_msec() - start_time));
|
||||
|
||||
return OK;
|
||||
}
|
||||
|
||||
void ensure_betsy_exists() {
|
||||
betsy_mutex.lock();
|
||||
if (betsy == nullptr) {
|
||||
betsy = memnew(BetsyCompressor);
|
||||
betsy->init();
|
||||
}
|
||||
betsy_mutex.unlock();
|
||||
}
|
||||
|
||||
Error _betsy_compress_bptc(Image *r_img, Image::UsedChannels p_channels) {
|
||||
ensure_betsy_exists();
|
||||
Image::Format format = r_img->get_format();
|
||||
Error result = ERR_UNAVAILABLE;
|
||||
|
||||
if (format >= Image::FORMAT_RF && format <= Image::FORMAT_RGBE9995) {
|
||||
return _compress_betsy(BETSY_FORMAT_BC6, r_img);
|
||||
if (r_img->detect_signed()) {
|
||||
result = betsy->compress(BETSY_FORMAT_BC6_SIGNED, r_img);
|
||||
} else {
|
||||
result = betsy->compress(BETSY_FORMAT_BC6_UNSIGNED, r_img);
|
||||
}
|
||||
}
|
||||
|
||||
return ERR_UNAVAILABLE;
|
||||
if (!GLOBAL_GET("rendering/textures/vram_compression/cache_gpu_compressor")) {
|
||||
free_device();
|
||||
}
|
||||
|
||||
return result;
|
||||
}
|
||||
|
||||
Error _betsy_compress_s3tc(Image *r_img, Image::UsedChannels p_channels) {
|
||||
ensure_betsy_exists();
|
||||
Error result = ERR_UNAVAILABLE;
|
||||
|
||||
switch (p_channels) {
|
||||
case Image::USED_CHANNELS_RGB:
|
||||
result = betsy->compress(BETSY_FORMAT_BC1_DITHER, r_img);
|
||||
break;
|
||||
|
||||
case Image::USED_CHANNELS_L:
|
||||
result = betsy->compress(BETSY_FORMAT_BC1, r_img);
|
||||
break;
|
||||
|
||||
default:
|
||||
break;
|
||||
}
|
||||
|
||||
if (!GLOBAL_GET("rendering/textures/vram_compression/cache_gpu_compressor")) {
|
||||
free_device();
|
||||
}
|
||||
|
||||
return result;
|
||||
}
|
||||
|
||||
void free_device() {
|
||||
if (betsy != nullptr) {
|
||||
betsy->finish();
|
||||
memdelete(betsy);
|
||||
}
|
||||
}
|
||||
|
@ -32,13 +32,79 @@
|
||||
#define IMAGE_COMPRESS_BETSY_H
|
||||
|
||||
#include "core/io/image.h"
|
||||
#include "core/object/worker_thread_pool.h"
|
||||
#include "core/os/thread.h"
|
||||
#include "core/templates/command_queue_mt.h"
|
||||
|
||||
#include "servers/rendering/rendering_device_binds.h"
|
||||
#include "servers/rendering/rendering_server_default.h"
|
||||
|
||||
#if defined(VULKAN_ENABLED)
|
||||
#include "drivers/vulkan/rendering_context_driver_vulkan.h"
|
||||
#endif
|
||||
#if defined(METAL_ENABLED)
|
||||
#include "drivers/metal/rendering_context_driver_metal.h"
|
||||
#endif
|
||||
|
||||
enum BetsyFormat {
|
||||
BETSY_FORMAT_BC6,
|
||||
BETSY_FORMAT_BC1,
|
||||
BETSY_FORMAT_BC1_DITHER,
|
||||
BETSY_FORMAT_BC3,
|
||||
BETSY_FORMAT_BC6_SIGNED,
|
||||
BETSY_FORMAT_BC6_UNSIGNED,
|
||||
};
|
||||
|
||||
Error _compress_betsy(BetsyFormat p_format, Image *r_img);
|
||||
struct BC6PushConstant {
|
||||
float sizeX;
|
||||
float sizeY;
|
||||
uint32_t padding[2];
|
||||
};
|
||||
|
||||
struct BC1PushConstant {
|
||||
uint32_t num_refines;
|
||||
uint32_t padding[3];
|
||||
};
|
||||
|
||||
void free_device();
|
||||
|
||||
Error _betsy_compress_bptc(Image *r_img, Image::UsedChannels p_channels);
|
||||
Error _betsy_compress_s3tc(Image *r_img, Image::UsedChannels p_channels);
|
||||
|
||||
class BetsyCompressor : public Object {
|
||||
mutable CommandQueueMT command_queue;
|
||||
bool exit = false;
|
||||
WorkerThreadPool::TaskID task_id = WorkerThreadPool::INVALID_TASK_ID;
|
||||
|
||||
struct BetsyShader {
|
||||
RID compiled;
|
||||
RID pipeline;
|
||||
};
|
||||
|
||||
// Resources shared by all compression formats.
|
||||
RenderingDevice *compress_rd = nullptr;
|
||||
RenderingContextDriver *compress_rcd = nullptr;
|
||||
HashMap<String, BetsyShader> cached_shaders;
|
||||
RID src_sampler = RID();
|
||||
|
||||
// Format-specific resources.
|
||||
RID dxt1_encoding_table_buffer = RID();
|
||||
|
||||
void _init();
|
||||
void _assign_mt_ids(WorkerThreadPool::TaskID p_pump_task_id);
|
||||
void _thread_loop();
|
||||
void _thread_exit();
|
||||
|
||||
Error _compress(BetsyFormat p_format, Image *r_img);
|
||||
|
||||
public:
|
||||
void init();
|
||||
void finish();
|
||||
|
||||
Error compress(BetsyFormat p_format, Image *r_img) {
|
||||
Error err;
|
||||
command_queue.push_and_ret(this, &BetsyCompressor::_compress, p_format, r_img, &err);
|
||||
return err;
|
||||
}
|
||||
};
|
||||
|
||||
#endif // IMAGE_COMPRESS_BETSY_H
|
||||
|
@ -38,10 +38,13 @@ void initialize_betsy_module(ModuleInitializationLevel p_level) {
|
||||
}
|
||||
|
||||
Image::_image_compress_bptc_rd_func = _betsy_compress_bptc;
|
||||
Image::_image_compress_bc_rd_func = _betsy_compress_s3tc;
|
||||
}
|
||||
|
||||
void uninitialize_betsy_module(ModuleInitializationLevel p_level) {
|
||||
if (p_level != MODULE_INITIALIZATION_LEVEL_SCENE) {
|
||||
return;
|
||||
}
|
||||
|
||||
free_device();
|
||||
}
|
||||
|
@ -3528,6 +3528,7 @@ void RenderingServer::init() {
|
||||
GLOBAL_DEF_RST("rendering/textures/vram_compression/import_s3tc_bptc", false);
|
||||
GLOBAL_DEF_RST("rendering/textures/vram_compression/import_etc2_astc", false);
|
||||
GLOBAL_DEF("rendering/textures/vram_compression/compress_with_gpu", true);
|
||||
GLOBAL_DEF("rendering/textures/vram_compression/cache_gpu_compressor", true);
|
||||
|
||||
GLOBAL_DEF("rendering/textures/lossless_compression/force_png", false);
|
||||
|
||||
|
2
thirdparty/README.md
vendored
2
thirdparty/README.md
vendored
@ -78,7 +78,7 @@ fix build with our own copy of zstd (patch in `patches`).
|
||||
|
||||
Files extracted from upstream source:
|
||||
|
||||
- `bc6h.glsl`, `CrossPlatformSettings_piece_all.glsl` and `UavCrossPlatform_piece_all.glsl`.
|
||||
- `bc6h.glsl`, `bc1.glsl`, `CrossPlatformSettings_piece_all.glsl` and `UavCrossPlatform_piece_all.glsl`.
|
||||
- `LICENSE.md`
|
||||
|
||||
|
||||
|
Loading…
Reference in New Issue
Block a user