Optimize .hdr loading and RGB9E5 conversion

This commit is contained in:
BlueCube3310 2024-08-08 19:13:00 +02:00
parent 739019e4e4
commit 80cf6cbfe9
3 changed files with 55 additions and 31 deletions

View File

@ -129,33 +129,46 @@ struct [[nodiscard]] Color {
} }
_FORCE_INLINE_ uint32_t to_rgbe9995() const { _FORCE_INLINE_ uint32_t to_rgbe9995() const {
const float pow2to9 = 512.0f; // https://github.com/microsoft/DirectX-Graphics-Samples/blob/v10.0.19041.0/MiniEngine/Core/Color.cpp
const float B = 15.0f; static const float kMaxVal = float(0x1FF << 7);
const float N = 9.0f; static const float kMinVal = float(1.f / (1 << 16));
float sharedexp = 65408.000f; // Result of: ((pow2to9 - 1.0f) / pow2to9) * powf(2.0f, 31.0f - 15.0f) // Clamp RGB to [0, 1.FF*2^16]
const float _r = CLAMP(r, 0.0f, kMaxVal);
const float _g = CLAMP(g, 0.0f, kMaxVal);
const float _b = CLAMP(b, 0.0f, kMaxVal);
float cRed = MAX(0.0f, MIN(sharedexp, r)); // Compute the maximum channel, no less than 1.0*2^-15
float cGreen = MAX(0.0f, MIN(sharedexp, g)); const float MaxChannel = MAX(MAX(_r, _g), MAX(_b, kMinVal));
float cBlue = MAX(0.0f, MIN(sharedexp, b));
float cMax = MAX(cRed, MAX(cGreen, cBlue)); // Take the exponent of the maximum channel (rounding up the 9th bit) and
// add 15 to it. When added to the channels, it causes the implicit '1.0'
// bit and the first 8 mantissa bits to be shifted down to the low 9 bits
// of the mantissa, rounding the truncated bits.
union {
float f;
int32_t i;
} R, G, B, E;
float expp = MAX(-B - 1.0f, floor(Math::log(cMax) / (real_t)Math_LN2)) + 1.0f + B; E.f = MaxChannel;
E.i += 0x07804000; // Add 15 to the exponent and 0x4000 to the mantissa
E.i &= 0x7F800000; // Zero the mantissa
float sMax = (float)floor((cMax / Math::pow(2.0f, expp - B - N)) + 0.5f); // This shifts the 9-bit values we need into the lowest bits, rounding as
// needed. Note that if the channel has a smaller exponent than the max
// channel, it will shift even more. This is intentional.
R.f = _r + E.f;
G.f = _g + E.f;
B.f = _b + E.f;
float exps = expp + 1.0f; // Convert the Bias to the correct exponent in the upper 5 bits.
E.i <<= 4;
E.i += 0x10000000;
if (0.0f <= sMax && sMax < pow2to9) { // Combine the fields. RGB floats have unwanted data in the upper 9
exps = expp; // bits. Only red needs to mask them off because green and blue shift
} // it out to the left.
return E.i | (B.i << 18) | (G.i << 9) | (R.i & 511);
float sRed = Math::floor((cRed / pow(2.0f, exps - B - N)) + 0.5f);
float sGreen = Math::floor((cGreen / pow(2.0f, exps - B - N)) + 0.5f);
float sBlue = Math::floor((cBlue / pow(2.0f, exps - B - N)) + 0.5f);
return (uint32_t(Math::fast_ftoi(sRed)) & 0x1FF) | ((uint32_t(Math::fast_ftoi(sGreen)) & 0x1FF) << 9) | ((uint32_t(Math::fast_ftoi(sBlue)) & 0x1FF) << 18) | ((uint32_t(Math::fast_ftoi(exps)) & 0x1F) << 27);
} }
_FORCE_INLINE_ Color blend(const Color &p_over) const { _FORCE_INLINE_ Color blend(const Color &p_over) const {

View File

@ -68,9 +68,11 @@ Error ImageLoaderHDR::load_image(Ref<Image> p_image, Ref<FileAccess> f, BitField
imgdata.resize(height * width * (int)sizeof(uint32_t)); imgdata.resize(height * width * (int)sizeof(uint32_t));
{ {
uint8_t *w = imgdata.ptrw(); uint8_t *ptr = imgdata.ptrw();
uint8_t *ptr = (uint8_t *)w; Vector<uint8_t> temp_read_data;
temp_read_data.resize(128);
uint8_t *temp_read_ptr = temp_read_data.ptrw();
if (width < 8 || width >= 32768) { if (width < 8 || width >= 32768) {
// Read flat data // Read flat data
@ -113,8 +115,9 @@ Error ImageLoaderHDR::load_image(Ref<Image> p_image, Ref<FileAccess> f, BitField
} }
} else { } else {
// Dump // Dump
f->get_buffer(temp_read_ptr, count);
for (int z = 0; z < count; ++z) { for (int z = 0; z < count; ++z) {
ptr[(j * width + i++) * 4 + k] = f->get_8(); ptr[(j * width + i++) * 4 + k] = temp_read_ptr[z];
} }
} }
} }
@ -122,20 +125,27 @@ Error ImageLoaderHDR::load_image(Ref<Image> p_image, Ref<FileAccess> f, BitField
} }
} }
const bool force_linear = p_flags & FLAG_FORCE_LINEAR;
//convert //convert
for (int i = 0; i < width * height; i++) { for (int i = 0; i < width * height; i++) {
float exp = pow(2.0f, ptr[3] - 128.0f); int e = ptr[3] - 128;
Color c( if (force_linear || (e < -15 || e > 15)) {
ptr[0] * exp / 255.0, float exp = pow(2.0f, e);
ptr[1] * exp / 255.0, Color c(ptr[0] * exp / 255.0, ptr[1] * exp / 255.0, ptr[2] * exp / 255.0);
ptr[2] * exp / 255.0);
if (p_flags & FLAG_FORCE_LINEAR) { if (force_linear) {
c = c.srgb_to_linear(); c = c.srgb_to_linear();
}
*(uint32_t *)ptr = c.to_rgbe9995();
} else {
// https://github.com/george-steel/rgbe-rs/blob/e7cc33b7f42b4eb3272c166dac75385e48687c92/src/types.rs#L123-L129
uint32_t e5 = (uint32_t)(e + 15);
*(uint32_t *)ptr = ((e5 << 27) | ((uint32_t)ptr[2] << 19) | ((uint32_t)ptr[1] << 10) | ((uint32_t)ptr[0] << 1));
} }
*(uint32_t *)ptr = c.to_rgbe9995();
ptr += 4; ptr += 4;
} }
} }

View File

@ -37,6 +37,7 @@ class ImageLoaderHDR : public ImageFormatLoader {
public: public:
virtual Error load_image(Ref<Image> p_image, Ref<FileAccess> f, BitField<ImageFormatLoader::LoaderFlags> p_flags, float p_scale); virtual Error load_image(Ref<Image> p_image, Ref<FileAccess> f, BitField<ImageFormatLoader::LoaderFlags> p_flags, float p_scale);
virtual void get_recognized_extensions(List<String> *p_extensions) const; virtual void get_recognized_extensions(List<String> *p_extensions) const;
ImageLoaderHDR(); ImageLoaderHDR();
}; };