From: Connor McAdams Subject: [PATCH v2 1/2] wined3d: Add decompression functions for DXT1, DXT3, and DXT5 Message-Id: <1529697060-6908-2-git-send-email-conmanx360@gmail.com> Date: Fri, 22 Jun 2018 15:50:59 -0400 In-Reply-To: <1529697060-6908-1-git-send-email-conmanx360@gmail.com> References: <1529697060-6908-1-git-send-email-conmanx360@gmail.com> This adds decompression functions for DXT1, DXT3, and DXT5 formats, which covers all the possible DXT formats. These are used for decompressing DXTn volume textures, and converting them to b8g8r8a8. This patch also makes sure only 3D DXTn textures are uploaded by performing a check in the wined3d_texture_upload_data function. Signed-off-by: Connor McAdams --- dlls/wined3d/texture.c | 26 ++++ dlls/wined3d/utils.c | 323 ++++++++++++++++++++++++++++++++++++++++++++++--- 2 files changed, 333 insertions(+), 16 deletions(-) diff --git a/dlls/wined3d/texture.c b/dlls/wined3d/texture.c index c316906..a998b9c 100644 --- a/dlls/wined3d/texture.c +++ b/dlls/wined3d/texture.c @@ -1948,6 +1948,32 @@ void wined3d_texture_upload_data(struct wined3d_texture *texture, unsigned int s bo.addr += src_box->left * format->byte_count; } + if (format->id == WINED3DFMT_DXT5 || format->id == WINED3DFMT_DXT4 || + format->id == WINED3DFMT_DXT3 || format->id == WINED3DFMT_DXT2 || + format->id == WINED3DFMT_DXT1) + { + if (target == GL_TEXTURE_3D) + { + struct wined3d_format temp; + + f = *format; + temp.upload = f.upload; + format = &f; + format = wined3d_get_format(gl_info, WINED3DFMT_B8G8R8A8_UNORM, WINED3DUSAGE_TEXTURE); + f = *format; + f.upload = temp.upload; + f.conv_byte_count = 4; + format = &f; + texture->resource.format_flags &= ~WINED3DFMT_FLAG_BLOCKS; + } + else + { + f = *format; + f.upload = NULL; + format = &f; + } + } + if (format->upload) { unsigned int dst_row_pitch, dst_slice_pitch; diff --git a/dlls/wined3d/utils.c b/dlls/wined3d/utils.c index 937c1bc..719f23e 100644 --- a/dlls/wined3d/utils.c +++ b/dlls/wined3d/utils.c @@ -840,6 +840,307 @@ static void convert_s8_uint_d24_float(const BYTE *src, BYTE *dst, UINT src_row_p } } +static void dxt5_decompress_block(const BYTE *src, BYTE *dst, UINT width, UINT height, UINT depth, + UINT x_pos, UINT y_pos, UINT z_pos, UINT dst_slice_pitch, UINT64 cur_block) +{ + UINT64 alpha_block, alpha_index, color_block, color_index; + const UINT64 *source; + DWORD *dest; + DWORD alpha_lookup; + DWORD bgra; + DWORD temp; + DWORD i, x, y; + WORD color[2]; + BYTE alpha_val, color_val; + BYTE alpha[8]; + BYTE r[4]; + BYTE g[4]; + BYTE b[4]; + + source = (const UINT64 *)(src + cur_block * 16); + alpha_block = source[0]; + color_block = source[1]; + + alpha[0] = alpha_block & 0xff; + alpha[1] = (alpha_block >> 8) & 0xff; + + if (alpha[0] > alpha[1]) + { + for (i = 0; i < 6; ++i) + alpha[2 + i] = (((6 - i) * alpha[0]) + ((1 + i) * alpha[1])) / 7; + } + else if (alpha[0] <= alpha[1]) + { + for (i = 0; i < 4; ++i) + alpha[2 + i] = (((4 - i) * alpha[0]) + ((1 + i) * alpha[1])) / 5; + alpha[6] = 0; + alpha[7] = 255; + } + + color[0] = color_block & 0xffff; + color[1] = (color_block >> 16) & 0xffff; + + for (i = 0; i < 2; ++i) + { + temp = (color[i] >> 11) * 255 + 16; + r[i] = (temp / 32 + temp) / 32; + temp = ((color[i] >> 5) & 0x3f) * 255 + 32; + g[i] = (temp / 64 + temp) / 64; + temp = (color[i] & 0x1f) * 255 + 16; + b[i] = (temp / 32 + temp) / 32; + } + + for (i = 0; i < 2; ++i) + { + r[2 + i] = (2 * r[0 + i] + r[1 - i]) / 3; + g[2 + i] = (2 * g[0 + i] + g[1 - i]) / 3; + b[2 + i] = (2 * b[0 + i] + b[1 - i]) / 3; + } + + color_index = (color_block >> 32) & 0xffffffff; + alpha_index = (alpha_block >> 16); + + dest = (DWORD *)(dst + z_pos * dst_slice_pitch); + + for (y = 0; y < 4; ++y) + { + if (y_pos + y >= height) + break; + for (x = 0; x < 4; ++x) + { + if (x_pos + x >= width) + break; + + color_val = 0; + alpha_val = 0; + bgra = 0; + + color_val = (color_index >> (y * 8)); + color_val = (color_val >> (x * 2)) & 0x3; + alpha_lookup = (alpha_index >> (y * 12)) & 0xfff; + alpha_val = (alpha_lookup >> (x * 3)) & 0x7; + bgra = ((alpha[alpha_val] << 24) | (r[color_val] << 16) | (g[color_val] << 8) | b[color_val]); + dest[(y_pos + y) * width + (x_pos + x)] = bgra; + } + } +} + +static void convert_dxt5_b8g8r8a8_unorm(const BYTE *src, BYTE *dst, UINT src_row_pitch, UINT src_slice_pitch, + UINT dst_row_pitch, UINT dst_slice_pitch, UINT width, UINT height, UINT depth) +{ + UINT64 current_block; + DWORD x, y, z; + + current_block = 0; + + for (z = 0; z < depth; ++z) + { + for (y = 0; y < height; y += 4) + { + for (x = 0; x < width; x += 4) + { + dxt5_decompress_block(src, dst, width, height, depth, x, y, z, dst_slice_pitch, current_block); + current_block++; + } + } + } +} + +static void dxt3_decompress_block(const BYTE *src, BYTE *dst, UINT width, UINT height, UINT depth, + UINT x_pos, UINT y_pos, UINT z_pos, UINT dst_slice_pitch, UINT64 cur_block) +{ + UINT64 alpha_block, alpha_lookup, color_block, color_index; + const UINT64 *source; + DWORD *dest; + DWORD bgra; + DWORD temp; + DWORD i, x, y; + WORD color[2]; + BYTE alpha_val, color_val; + BYTE r[4]; + BYTE g[4]; + BYTE b[4]; + + source = (const UINT64 *)(src + cur_block * 16); + alpha_block = source[0]; + color_block = source[1]; + + color[0] = color_block & 0xffff; + color[1] = (color_block >> 16) & 0xffff; + + for (i = 0; i < 2; ++i) + { + temp = (color[i] >> 11) * 255 + 16; + r[i] = (temp / 32 + temp) / 32; + temp = ((color[i] >> 5) & 0x3f) * 255 + 32; + g[i] = (temp / 64 + temp) / 64; + temp = (color[i] & 0x1f) * 255 + 16; + b[i] = (temp / 32 + temp) / 32; + } + + for (i = 0; i < 2; ++i) + { + r[2 + i] = (2 * r[0 + i] + r[1 - i]) / 3; + g[2 + i] = (2 * g[0 + i] + g[1 - i]) / 3; + b[2 + i] = (2 * b[0 + i] + b[1 - i]) / 3; + } + + color_index = (color_block >> 32) & 0xffffffff; + dest = (DWORD *)(dst + z_pos * dst_slice_pitch); + + for (y = 0; y < 4; ++y) + { + if (y_pos + y >= height) + break; + for (x = 0; x < 4; ++x) + { + if (x_pos + x >= width) + break; + + color_val = 0; + alpha_val = 0; + bgra = 0; + + color_val = (color_index >> (y * 8)); + color_val = (color_val >> (x * 2)) & 0x3; + + alpha_lookup = (alpha_block >> (y * 16)) & 0xffff; + alpha_val = (alpha_lookup >> (x * 4)) & 0xf; + temp = alpha_val * 255 + 8; + alpha_val = (temp / 16 + temp) / 16; + + bgra = ((alpha_val << 24) | (r[color_val] << 16) | (g[color_val] << 8) | b[color_val]); + dest[(y_pos + y) * width + (x_pos + x)] = bgra; + } + } +} + +static void convert_dxt3_b8g8r8a8_unorm(const BYTE *src, BYTE *dst, UINT src_row_pitch, UINT src_slice_pitch, + UINT dst_row_pitch, UINT dst_slice_pitch, UINT width, UINT height, UINT depth) +{ + UINT64 current_block; + DWORD x, y, z; + + current_block = 0; + + for (z = 0; z < depth; ++z) + { + for (y = 0; y < height; y += 4) + { + for (x = 0; x < width; x += 4) + { + dxt3_decompress_block(src, dst, width, height, depth, x, y, z, dst_slice_pitch, current_block); + current_block++; + } + } + } +} + +static void dxt1_decompress_block(const BYTE *src, BYTE *dst, UINT width, UINT height, UINT depth, + UINT x_pos, UINT y_pos, UINT z_pos, UINT dst_slice_pitch, UINT64 cur_block) +{ + UINT64 color_block, color_index; + const UINT64 *source; + DWORD *dest; + DWORD bgra; + DWORD temp; + DWORD i, x, y; + WORD color[2]; + BYTE color_val; + BYTE alpha; + BYTE r[4]; + BYTE g[4]; + BYTE b[4]; + BYTE use_alpha; + + source = (const UINT64 *)(src + cur_block * 8); + color_block = source[0]; + + color[0] = color_block & 0xffff; + color[1] = (color_block >> 16) & 0xffff; + + for (i = 0; i < 2; ++i) + { + temp = (color[i] >> 11) * 255 + 16; + r[i] = (temp / 32 + temp) / 32; + temp = ((color[i] >> 5) & 0x3f) * 255 + 32; + g[i] = (temp / 64 + temp) / 64; + temp = (color[i] & 0x1f) * 255 + 16; + b[i] = (temp / 32 + temp) / 32; + } + + if (color[0] > color[1]) + { + for (i = 0; i < 2; ++i) + { + r[2 + i] = (2 * r[0 + i] + r[1 - i]) / 3; + g[2 + i] = (2 * g[0 + i] + g[1 - i]) / 3; + b[2 + i] = (2 * b[0 + i] + b[1 - i]) / 3; + } + use_alpha = 0; + } + else if (color[0] <= color[1]) + { + r[2] = (r[0] + r[1]) / 2; + g[2] = (g[0] + g[1]) / 2; + b[2] = (b[0] + b[1]) / 2; + + r[3] = 0; + g[3] = 0; + b[3] = 0; + + use_alpha = 1; + } + + color_index = (color_block >> 32) & 0xffffffff; + dest = (DWORD *)(dst + z_pos * dst_slice_pitch); + + for (y = 0; y < 4; ++y) + { + if (y_pos + y >= height) + break; + for (x = 0; x < 4; ++x) + { + if (x_pos + x >= width) + break; + + color_val = 0; + bgra = 0; + + color_val = (color_index >> (y * 8)); + color_val = (color_val >> (x * 2)) & 0x3; + if (color_val == 3 && use_alpha == 1) + alpha = 0; + else + alpha = 255; + + bgra = ((alpha << 24) | (r[color_val] << 16) | (g[color_val] << 8) | b[color_val]); + dest[(y_pos + y) * width + (x_pos + x)] = bgra; + } + } +} + +static void convert_dxt1_b8g8r8a8_unorm(const BYTE *src, BYTE *dst, UINT src_row_pitch, UINT src_slice_pitch, + UINT dst_row_pitch, UINT dst_slice_pitch, UINT width, UINT height, UINT depth) +{ + UINT64 current_block; + DWORD x, y, z; + + current_block = 0; + + for (z = 0; z < depth; ++z) + { + for (y = 0; y < height; y += 4) + { + for (x = 0; x < width; x += 4) + { + dxt1_decompress_block(src, dst, width, height, depth, x, y, z, dst_slice_pitch, current_block); + current_block++; + } + } + } +} + static void x8_d24_unorm_upload(const BYTE *src, BYTE *dst, unsigned int src_row_pitch, unsigned int src_slice_pitch, unsigned int dst_row_pitch, unsigned int dst_slice_pitch, @@ -1118,27 +1419,27 @@ static const struct wined3d_format_texture_info format_texture_info[] = GL_RGBA, GL_UNSIGNED_BYTE, 0, WINED3DFMT_FLAG_TEXTURE | WINED3DFMT_FLAG_POSTPIXELSHADER_BLENDING | WINED3DFMT_FLAG_FILTERING | WINED3DFMT_FLAG_SRGB_READ | WINED3DFMT_FLAG_COMPRESSED, - EXT_TEXTURE_COMPRESSION_S3TC, NULL}, + EXT_TEXTURE_COMPRESSION_S3TC, convert_dxt1_b8g8r8a8_unorm}, {WINED3DFMT_DXT2, GL_COMPRESSED_RGBA_S3TC_DXT3_EXT, GL_COMPRESSED_SRGB_ALPHA_S3TC_DXT3_EXT, 0, GL_RGBA, GL_UNSIGNED_BYTE, 0, WINED3DFMT_FLAG_TEXTURE | WINED3DFMT_FLAG_POSTPIXELSHADER_BLENDING | WINED3DFMT_FLAG_FILTERING | WINED3DFMT_FLAG_SRGB_READ | WINED3DFMT_FLAG_COMPRESSED, - EXT_TEXTURE_COMPRESSION_S3TC, NULL}, + EXT_TEXTURE_COMPRESSION_S3TC, convert_dxt3_b8g8r8a8_unorm}, {WINED3DFMT_DXT3, GL_COMPRESSED_RGBA_S3TC_DXT3_EXT, GL_COMPRESSED_SRGB_ALPHA_S3TC_DXT3_EXT, 0, GL_RGBA, GL_UNSIGNED_BYTE, 0, WINED3DFMT_FLAG_TEXTURE | WINED3DFMT_FLAG_POSTPIXELSHADER_BLENDING | WINED3DFMT_FLAG_FILTERING | WINED3DFMT_FLAG_SRGB_READ | WINED3DFMT_FLAG_COMPRESSED, - EXT_TEXTURE_COMPRESSION_S3TC, NULL}, + EXT_TEXTURE_COMPRESSION_S3TC, convert_dxt3_b8g8r8a8_unorm}, {WINED3DFMT_DXT4, GL_COMPRESSED_RGBA_S3TC_DXT5_EXT, GL_COMPRESSED_SRGB_ALPHA_S3TC_DXT5_EXT, 0, GL_RGBA, GL_UNSIGNED_BYTE, 0, WINED3DFMT_FLAG_TEXTURE | WINED3DFMT_FLAG_POSTPIXELSHADER_BLENDING | WINED3DFMT_FLAG_FILTERING | WINED3DFMT_FLAG_SRGB_READ | WINED3DFMT_FLAG_COMPRESSED, - EXT_TEXTURE_COMPRESSION_S3TC, NULL}, + EXT_TEXTURE_COMPRESSION_S3TC, convert_dxt5_b8g8r8a8_unorm}, {WINED3DFMT_DXT5, GL_COMPRESSED_RGBA_S3TC_DXT5_EXT, GL_COMPRESSED_SRGB_ALPHA_S3TC_DXT5_EXT, 0, GL_RGBA, GL_UNSIGNED_BYTE, 0, WINED3DFMT_FLAG_TEXTURE | WINED3DFMT_FLAG_POSTPIXELSHADER_BLENDING | WINED3DFMT_FLAG_FILTERING | WINED3DFMT_FLAG_SRGB_READ | WINED3DFMT_FLAG_COMPRESSED, - EXT_TEXTURE_COMPRESSION_S3TC, NULL}, + EXT_TEXTURE_COMPRESSION_S3TC, convert_dxt5_b8g8r8a8_unorm}, {WINED3DFMT_BC1_UNORM, GL_COMPRESSED_RGBA_S3TC_DXT1_EXT, GL_COMPRESSED_SRGB_ALPHA_S3TC_DXT1_EXT, 0, GL_RGBA, GL_UNSIGNED_BYTE, 0, WINED3DFMT_FLAG_TEXTURE | WINED3DFMT_FLAG_POSTPIXELSHADER_BLENDING | WINED3DFMT_FLAG_FILTERING @@ -3394,17 +3695,7 @@ static void apply_format_fixups(struct wined3d_adapter *adapter, struct wined3d_ * for dx9 GPUs support it, some do not, so not supporting DXTn volumes is OK for d3d9. * * Note that GL_NV_texture_compression_vtc adds this functionality to OpenGL, but the - * block layout is not compatible with the one used by d3d. See volume_dxt5_test. */ - idx = get_format_idx(WINED3DFMT_DXT1); - gl_info->formats[idx].flags[WINED3D_GL_RES_TYPE_TEX_3D] &= ~WINED3DFMT_FLAG_TEXTURE; - idx = get_format_idx(WINED3DFMT_DXT2); - gl_info->formats[idx].flags[WINED3D_GL_RES_TYPE_TEX_3D] &= ~WINED3DFMT_FLAG_TEXTURE; - idx = get_format_idx(WINED3DFMT_DXT3); - gl_info->formats[idx].flags[WINED3D_GL_RES_TYPE_TEX_3D] &= ~WINED3DFMT_FLAG_TEXTURE; - idx = get_format_idx(WINED3DFMT_DXT4); - gl_info->formats[idx].flags[WINED3D_GL_RES_TYPE_TEX_3D] &= ~WINED3DFMT_FLAG_TEXTURE; - idx = get_format_idx(WINED3DFMT_DXT5); - gl_info->formats[idx].flags[WINED3D_GL_RES_TYPE_TEX_3D] &= ~WINED3DFMT_FLAG_TEXTURE; + * block layout is not compatible with the one used by d3d. See volume_dxtn_test. */ idx = get_format_idx(WINED3DFMT_BC1_UNORM); gl_info->formats[idx].flags[WINED3D_GL_RES_TYPE_TEX_3D] &= ~WINED3DFMT_FLAG_TEXTURE; idx = get_format_idx(WINED3DFMT_BC1_UNORM_SRGB); -- 2.7.4