From 5a7bddbfa604d252e1a69006ace30ec4c39f0497 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Henrik=20Rydg=C3=A5rd?= Date: Thu, 18 Aug 2022 11:44:20 +0200 Subject: [PATCH 01/13] Allow binding depth as 565 by going through depal. This is a partial fix for #6105 (Ratchet & Clank particles visible through things), but there's still weird glitchiness. There's a pass during rendering that scrambles the mini depth buffer by using a triangle mesh. I wonder if it's trying to simulate the swizzle? But it doesn't really look like it... --- GPU/Common/DepalettizeCommon.cpp | 7 +++--- GPU/Common/DepalettizeCommon.h | 6 ++--- GPU/Common/DepalettizeShaderCommon.cpp | 21 ++++++++++++---- GPU/Common/DepalettizeShaderCommon.h | 3 ++- GPU/Common/TextureCacheCommon.cpp | 35 +++++++++++++++++++++++--- unittest/TestShaderGenerators.cpp | 3 ++- 6 files changed, 59 insertions(+), 16 deletions(-) diff --git a/GPU/Common/DepalettizeCommon.cpp b/GPU/Common/DepalettizeCommon.cpp index 70214fc3d5ca..185eecdbc4b0 100644 --- a/GPU/Common/DepalettizeCommon.cpp +++ b/GPU/Common/DepalettizeCommon.cpp @@ -147,10 +147,10 @@ Draw::SamplerState *DepalShaderCache::GetSampler() { return nearestSampler_; } -DepalShader *DepalShaderCache::GetDepalettizeShader(uint32_t clutMode, GEBufferFormat pixelFormat) { +DepalShader *DepalShaderCache::GetDepalettizeShader(uint32_t clutMode, GETextureFormat textureFormat, GEBufferFormat bufferFormat) { using namespace Draw; - u32 id = GenerateShaderID(clutMode, pixelFormat); + u32 id = GenerateShaderID(clutMode, textureFormat, bufferFormat); auto shader = cache_.find(id); if (shader != cache_.end()) { @@ -171,7 +171,8 @@ DepalShader *DepalShaderCache::GetDepalettizeShader(uint32_t clutMode, GEBufferF config.startPos = gstate.getClutIndexStartPos(); config.shift = gstate.getClutIndexShift(); config.mask = gstate.getClutIndexMask(); - config.pixelFormat = pixelFormat; + config.bufferFormat = bufferFormat; + config.textureFormat = textureFormat; GenerateDepalFs(buffer, config, draw_->GetShaderLanguageDesc()); diff --git a/GPU/Common/DepalettizeCommon.h b/GPU/Common/DepalettizeCommon.h index c394f47eb7c3..fbac86300df7 100644 --- a/GPU/Common/DepalettizeCommon.h +++ b/GPU/Common/DepalettizeCommon.h @@ -49,7 +49,7 @@ class DepalShaderCache { ~DepalShaderCache(); // This also uploads the palette and binds the correct texture. - DepalShader *GetDepalettizeShader(uint32_t clutMode, GEBufferFormat pixelFormat); + DepalShader *GetDepalettizeShader(uint32_t clutMode, GETextureFormat texFormat, GEBufferFormat pixelFormat); Draw::Texture *GetClutTexture(GEPaletteFormat clutFormat, const u32 clutHash, u32 *rawClut); Draw::SamplerState *GetSampler(); @@ -63,8 +63,8 @@ class DepalShaderCache { void DeviceRestore(Draw::DrawContext *draw); private: - static uint32_t GenerateShaderID(uint32_t clutMode, GEBufferFormat pixelFormat) { - return (clutMode & 0xFFFFFF) | (pixelFormat << 24); + static uint32_t GenerateShaderID(uint32_t clutMode, GETextureFormat texFormat, GEBufferFormat pixelFormat) { + return (clutMode & 0xFFFFFF) | (pixelFormat << 24) | (texFormat << 28); } static uint32_t GetClutID(GEPaletteFormat clutFormat, uint32_t clutHash) { diff --git a/GPU/Common/DepalettizeShaderCommon.cpp b/GPU/Common/DepalettizeShaderCommon.cpp index 9874e91d6e39..11c3c5039447 100644 --- a/GPU/Common/DepalettizeShaderCommon.cpp +++ b/GPU/Common/DepalettizeShaderCommon.cpp @@ -49,7 +49,7 @@ void GenerateDepalShader300(ShaderWriter &writer, const DepalConfig &config, con const int shift = config.shift; const int mask = config.mask; - if (config.pixelFormat == GE_FORMAT_DEPTH16) { + if (config.bufferFormat == GE_FORMAT_DEPTH16) { DepthScaleFactors factors = GetDepthScaleFactors(); writer.ConstFloat("z_scale", factors.scale); writer.ConstFloat("z_offset", factors.offset); @@ -71,7 +71,7 @@ void GenerateDepalShader300(ShaderWriter &writer, const DepalConfig &config, con writer.C(" vec4 color = ").SampleTexture2D("tex", "v_texcoord").C(";\n"); int shiftedMask = mask << shift; - switch (config.pixelFormat) { + switch (config.bufferFormat) { case GE_FORMAT_8888: if (shiftedMask & 0xFF) writer.C(" int r = int(color.r * 255.99);\n"); else writer.C(" int r = 0;\n"); if (shiftedMask & 0xFF00) writer.C(" int g = int(color.g * 255.99);\n"); else writer.C(" int g = 0;\n"); @@ -102,6 +102,17 @@ void GenerateDepalShader300(ShaderWriter &writer, const DepalConfig &config, con case GE_FORMAT_DEPTH16: // Remap depth buffer. writer.C(" float depth = (color.x - z_offset) * z_scale;\n"); + + if (config.bufferFormat == GE_FORMAT_DEPTH16 && config.textureFormat == GE_TFMT_5650) { + // Convert depth to 565, without going through a CLUT. + writer.C(" int idepth = int(clamp(depth, 0.0, 65535.0));\n"); + writer.C(" float r = (idepth & 31) / 31.0f;\n"); + writer.C(" float g = ((idepth >> 5) & 63) / 63.0f;\n"); + writer.C(" float b = ((idepth >> 11) & 31) / 31.0f;\n"); + writer.C(" vec4 outColor = vec4(r, g, b, 1.0);\n"); + return; + } + writer.C(" int index = int(clamp(depth, 0.0, 65535.0));\n"); break; default: @@ -135,7 +146,7 @@ void GenerateDepalShaderFloat(ShaderWriter &writer, const DepalConfig &config, c const int shift = config.shift; const int mask = config.mask; - if (config.pixelFormat == GE_FORMAT_DEPTH16) { + if (config.bufferFormat == GE_FORMAT_DEPTH16) { DepthScaleFactors factors = GetDepthScaleFactors(); writer.ConstFloat("z_scale", factors.scale); writer.ConstFloat("z_offset", factors.offset); @@ -144,7 +155,7 @@ void GenerateDepalShaderFloat(ShaderWriter &writer, const DepalConfig &config, c float index_multiplier = 1.0f; // pixelformat is the format of the texture we are sampling. bool formatOK = true; - switch (config.pixelFormat) { + switch (config.bufferFormat) { case GE_FORMAT_8888: if ((mask & (mask + 1)) == 0) { // If the value has all bits contiguous (bitmask check above), we can mod by it + 1. @@ -249,7 +260,7 @@ void GenerateDepalShaderFloat(ShaderWriter &writer, const DepalConfig &config, c // index_multiplier -= 0.01f / texturePixels; if (!formatOK) { - ERROR_LOG_REPORT_ONCE(depal, G3D, "%s depal unsupported: shift=%d mask=%02x offset=%d", GeBufferFormatToString(config.pixelFormat), shift, mask, config.startPos); + ERROR_LOG_REPORT_ONCE(depal, G3D, "%s depal unsupported: shift=%d mask=%02x offset=%d", GeBufferFormatToString(config.bufferFormat), shift, mask, config.startPos); } // Offset by half a texel (plus clutBase) to turn NEAREST filtering into FLOOR. diff --git a/GPU/Common/DepalettizeShaderCommon.h b/GPU/Common/DepalettizeShaderCommon.h index 5ce5ef88a84c..74bb38d19606 100644 --- a/GPU/Common/DepalettizeShaderCommon.h +++ b/GPU/Common/DepalettizeShaderCommon.h @@ -29,7 +29,8 @@ struct DepalConfig { int shift; u32 startPos; GEPaletteFormat clutFormat; - GEBufferFormat pixelFormat; + GETextureFormat textureFormat; + GEBufferFormat bufferFormat; }; void GenerateDepalFs(char *buffer, const DepalConfig &config, const ShaderLanguageDesc &lang); diff --git a/GPU/Common/TextureCacheCommon.cpp b/GPU/Common/TextureCacheCommon.cpp index af642a7df1dc..64a48537b638 100644 --- a/GPU/Common/TextureCacheCommon.cpp +++ b/GPU/Common/TextureCacheCommon.cpp @@ -525,6 +525,10 @@ TexCacheEntry *TextureCacheCommon::SetTexture() { def.format = format; def.bufw = bufw; + if (texaddr == 0x04710000) { + texaddr = texaddr; + } + std::vector candidates = GetFramebufferCandidates(def, 0); if (candidates.size() > 0) { int index = GetBestCandidateIndex(candidates); @@ -892,6 +896,7 @@ bool TextureCacheCommon::MatchFramebuffer( const bool noOffset = texaddr == addr; const bool exactMatch = noOffset && entry.format < 4 && channel == RASTER_COLOR; + const u32 w = 1 << ((entry.dim >> 0) & 0xf); const u32 h = 1 << ((entry.dim >> 8) & 0xf); // 512 on a 272 framebuffer is sane, so let's be lenient. @@ -927,6 +932,7 @@ bool TextureCacheCommon::MatchFramebuffer( // Check works for D16 too (???) const bool matchingClutFormat = (fb_format == GE_FORMAT_DEPTH16 && entry.format == GE_TFMT_CLUT16) || + (fb_format == GE_FORMAT_DEPTH16 && entry.format == GE_TFMT_5650) || (fb_format == GE_FORMAT_8888 && entry.format == GE_TFMT_CLUT32) || (fb_format != GE_FORMAT_8888 && entry.format == GE_TFMT_CLUT16); @@ -971,7 +977,9 @@ bool TextureCacheCommon::MatchFramebuffer( // 3rd Birthday (and a bunch of other games) render to a 16 bit clut texture. if (matchingClutFormat) { if (!noOffset) { - WARN_LOG_ONCE(subareaClut, G3D, "Texturing from framebuffer using CLUT with offset at %08x +%dx%d", fb_address, matchInfo->xOffset, matchInfo->yOffset); + WARN_LOG_ONCE(subareaClut, G3D, "Texturing from framebuffer (%s) using %s with offset at %08x +%dx%d", channel == RASTER_DEPTH ? "DEPTH" : "COLOR", GeTextureFormatToString(entry.format), fb_address, matchInfo->xOffset, matchInfo->yOffset); + } else { + WARN_LOG_ONCE(subareaClut, G3D, "Texturing from framebuffer (%s) using %s at %08x", channel == RASTER_DEPTH ? "DEPTH" : "COLOR", GeTextureFormatToString(entry.format), fb_address); } return true; } else if (IsClutFormat((GETextureFormat)(entry.format)) || IsDXTFormat((GETextureFormat)(entry.format))) { @@ -1823,12 +1831,33 @@ void TextureCacheCommon::ApplyTexture() { gstate_c.SetTextureIs3D((entry->status & TexCacheEntry::STATUS_3D) != 0); } +bool CanDepalettize(GETextureFormat texFormat, GEBufferFormat bufferFormat) { + if (IsClutFormat(texFormat)) { + switch (bufferFormat) { + case GE_FORMAT_4444: + case GE_FORMAT_565: + case GE_FORMAT_5551: + case GE_FORMAT_DEPTH16: + return texFormat == GE_TFMT_CLUT16; + case GE_FORMAT_8888: + return texFormat == GE_TFMT_CLUT32; + } + WARN_LOG(G3D, "Invalid CLUT/framebuffer combination: %s vs %s", GeTextureFormatToString(texFormat), GeBufferFormatToString(bufferFormat)); + return false; + } else if (texFormat == GE_TFMT_5650 && bufferFormat == GE_FORMAT_DEPTH16) { + // We can also "depal" 565 format, this is used to read depth buffers as 565 on occasion (#15491). + return true; + } else { + return false; + } +} + void TextureCacheCommon::ApplyTextureFramebuffer(VirtualFramebuffer *framebuffer, GETextureFormat texFormat, RasterChannel channel) { DepalShader *depalShader = nullptr; uint32_t clutMode = gstate.clutformat & 0xFFFFFF; - bool need_depalettize = IsClutFormat(texFormat); bool depth = channel == RASTER_DEPTH; + bool need_depalettize = CanDepalettize(texFormat, depth ? GE_FORMAT_DEPTH16 : framebuffer->drawnFormat); bool useShaderDepal = framebufferManager_->GetCurrentRenderVFB() != framebuffer && !depth && !gstate_c.curTextureIs3D; // TODO: Implement shader depal in the fragment shader generator for D3D11 at least. @@ -1878,7 +1907,7 @@ void TextureCacheCommon::ApplyTextureFramebuffer(VirtualFramebuffer *framebuffer return; } - depalShader = depalShaderCache_->GetDepalettizeShader(clutMode, depth ? GE_FORMAT_DEPTH16 : framebuffer->drawnFormat); + depalShader = depalShaderCache_->GetDepalettizeShader(clutMode, texFormat, depth ? GE_FORMAT_DEPTH16 : framebuffer->drawnFormat); gstate_c.SetUseShaderDepal(false); } diff --git a/unittest/TestShaderGenerators.cpp b/unittest/TestShaderGenerators.cpp index 19b0b0d2e977..a96df6657ce3 100644 --- a/unittest/TestShaderGenerators.cpp +++ b/unittest/TestShaderGenerators.cpp @@ -309,7 +309,8 @@ bool TestDepalShaders() { config.shift = 8; config.startPos = 64; config.mask = 0xFF; - config.pixelFormat = GE_FORMAT_8888; + config.bufferFormat = GE_FORMAT_8888; + config.textureFormat = GE_TFMT_CLUT32; GenerateDepalFs(buffer, config, desc); if (!TestCompileShader(buffer, languages[k], ShaderStage::Fragment, &errorMessage)) { From 346a252593c5d2d7bca4c3705ea86d5ac20be67f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Henrik=20Rydg=C3=A5rd?= Date: Thu, 18 Aug 2022 12:08:03 +0200 Subject: [PATCH 02/13] Fix naming of temp fbos to be in PSP pixels --- GPU/Common/FramebufferManagerCommon.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/GPU/Common/FramebufferManagerCommon.cpp b/GPU/Common/FramebufferManagerCommon.cpp index a1164c686ffd..3e524da19fd1 100644 --- a/GPU/Common/FramebufferManagerCommon.cpp +++ b/GPU/Common/FramebufferManagerCommon.cpp @@ -1941,7 +1941,7 @@ Draw::Framebuffer *FramebufferManagerCommon::GetTempFBO(TempFBO reason, u16 w, u bool z_stencil = reason == TempFBO::STENCIL; char name[128]; - snprintf(name, sizeof(name), "temp_fbo_%dx%d%s", w, h, z_stencil ? "_depth" : ""); + snprintf(name, sizeof(name), "temp_fbo_%dx%d%s", w / renderScaleFactor_, h / renderScaleFactor_, z_stencil ? "_depth" : ""); Draw::Framebuffer *fbo = draw_->CreateFramebuffer({ w, h, 1, 1, z_stencil, name }); if (!fbo) { return nullptr; From e6b60026d17b96484d78892d78ab23f9f6db4611 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Henrik=20Rydg=C3=A5rd?= Date: Thu, 18 Aug 2022 15:46:20 +0200 Subject: [PATCH 03/13] Fix graphics in Ratchet & Clank. WIP --- GPU/Common/DepalettizeCommon.cpp | 1 + GPU/Common/DepalettizeShaderCommon.h | 1 + GPU/Common/Draw2D.cpp | 36 +++++++++++++++++++++++++ GPU/Common/Draw2D.h | 2 ++ GPU/Common/FramebufferManagerCommon.cpp | 8 ++++-- GPU/Common/FramebufferManagerCommon.h | 1 + 6 files changed, 47 insertions(+), 2 deletions(-) diff --git a/GPU/Common/DepalettizeCommon.cpp b/GPU/Common/DepalettizeCommon.cpp index 185eecdbc4b0..4d395a2d4a85 100644 --- a/GPU/Common/DepalettizeCommon.cpp +++ b/GPU/Common/DepalettizeCommon.cpp @@ -173,6 +173,7 @@ DepalShader *DepalShaderCache::GetDepalettizeShader(uint32_t clutMode, GETexture config.mask = gstate.getClutIndexMask(); config.bufferFormat = bufferFormat; config.textureFormat = textureFormat; + config.resolutionScale = 3; GenerateDepalFs(buffer, config, draw_->GetShaderLanguageDesc()); diff --git a/GPU/Common/DepalettizeShaderCommon.h b/GPU/Common/DepalettizeShaderCommon.h index 74bb38d19606..c4577be92044 100644 --- a/GPU/Common/DepalettizeShaderCommon.h +++ b/GPU/Common/DepalettizeShaderCommon.h @@ -31,6 +31,7 @@ struct DepalConfig { GEPaletteFormat clutFormat; GETextureFormat textureFormat; GEBufferFormat bufferFormat; + int resolutionScale; }; void GenerateDepalFs(char *buffer, const DepalConfig &config, const ShaderLanguageDesc &lang); diff --git a/GPU/Common/Draw2D.cpp b/GPU/Common/Draw2D.cpp index a6b4abb68c01..89292adfaf83 100644 --- a/GPU/Common/Draw2D.cpp +++ b/GPU/Common/Draw2D.cpp @@ -73,6 +73,29 @@ RasterChannel GenerateDraw2D565ToDepthFs(ShaderWriter &writer) { return RASTER_DEPTH; } +// ugly way to get the scale into the function +static float g_scale; + +RasterChannel GenerateDraw2D565ToDepthDeswizzleFs(ShaderWriter &writer) { + writer.DeclareSamplers(samplers); + writer.BeginFSMain(Slice::empty(), varyings, FSFLAG_WRITEDEPTH); + writer.C(" vec4 outColor = vec4(0.0, 0.0, 0.0, 0.0);\n"); + // Unlike when just copying a depth buffer, here we're generating new depth values so we'll + // have to apply the scaling. + DepthScaleFactors factors = GetDepthScaleFactors(); + writer.C(" vec2 tsize = vec2(textureSize(tex, 0));\n"); + writer.C(" vec2 coord = v_texcoord * tsize;\n"); + writer.F(" float strip = 4.0 * %f;\n", g_scale); + writer.C(" float in_strip = mod(coord.y, strip);\n"); + writer.C(" coord.y = coord.y - in_strip + strip - in_strip;\n"); + writer.C(" coord /= tsize;\n"); + writer.C(" vec3 rgb = ").SampleTexture2D("tex", "coord").C(".xyz;\n"); + writer.F(" highp float depthValue = (floor(rgb.x * 31.99) + floor(rgb.y * 63.99) * 32.0 + floor(rgb.z * 31.99) * 2048.0); \n"); + writer.F(" gl_FragDepth = (depthValue / %f) + %f;\n", factors.scale, factors.offset); + writer.EndFSMain("outColor", FSFLAG_WRITEDEPTH); + return RASTER_DEPTH; +} + void GenerateDraw2DVS(ShaderWriter &writer) { writer.BeginVSMain(inputs, Slice::empty(), varyings); @@ -212,6 +235,19 @@ void FramebufferManagerCommon::DrawStrip2D(Draw::Texture *tex, Draw2DVertex *ver } draw_->BindPipeline(draw2DPipeline565ToDepth_); break; + + case DRAW2D_565_TO_DEPTH_DESWIZZLE: + if (!draw_->GetDeviceCaps().fragmentShaderDepthWriteSupported) { + // Can't do it + return; + } + if (!draw2DPipeline565ToDepthDeswizzle_) { + g_scale = renderScaleFactor_; + draw2DPipeline565ToDepthDeswizzle_ = Create2DPipeline(&GenerateDraw2D565ToDepthDeswizzleFs); + linearFilter = false; + } + draw_->BindPipeline(draw2DPipeline565ToDepthDeswizzle_); + break; } if (tex) { diff --git a/GPU/Common/Draw2D.h b/GPU/Common/Draw2D.h index fee2d4cef393..e1ed615ba86a 100644 --- a/GPU/Common/Draw2D.h +++ b/GPU/Common/Draw2D.h @@ -14,6 +14,7 @@ enum Draw2DShader { DRAW2D_COPY_COLOR, DRAW2D_COPY_DEPTH, DRAW2D_565_TO_DEPTH, + DRAW2D_565_TO_DEPTH_DESWIZZLE, }; inline RasterChannel Draw2DSourceChannel(Draw2DShader shader) { @@ -22,6 +23,7 @@ inline RasterChannel Draw2DSourceChannel(Draw2DShader shader) { return RASTER_DEPTH; case DRAW2D_COPY_COLOR: case DRAW2D_565_TO_DEPTH: + case DRAW2D_565_TO_DEPTH_DESWIZZLE: default: return RASTER_COLOR; } diff --git a/GPU/Common/FramebufferManagerCommon.cpp b/GPU/Common/FramebufferManagerCommon.cpp index 3e524da19fd1..c1052b785d01 100644 --- a/GPU/Common/FramebufferManagerCommon.cpp +++ b/GPU/Common/FramebufferManagerCommon.cpp @@ -560,11 +560,14 @@ void FramebufferManagerCommon::CopyToDepthFromOverlappingFramebuffers(VirtualFra dest->last_frame_depth_updated = gpuStats.numFlips; } else if (source.channel == RASTER_COLOR) { VirtualFramebuffer *src = source.vfb; - // Copying color to depth. if (src->drawnFormat != GE_FORMAT_565) { WARN_LOG_ONCE(not565, G3D, "Drawn format of buffer at %08x not 565 as expected", src->fb_address); } - BlitUsingRaster(src->fbo, 0.0f, 0.0f, src->renderWidth, src->renderHeight, dest->fbo, 0.0f, 0.0f, dest->renderWidth, dest->renderHeight, false, DRAW2D_565_TO_DEPTH, "565_to_depth"); + // Copying color to depth. + BlitUsingRaster( + src->fbo, 0.0f, 0.0f, src->renderWidth, src->renderHeight, + dest->fbo, 0.0f, 0.0f, src->renderWidth, src->renderHeight, + false, DRAW2D_565_TO_DEPTH_DESWIZZLE, "565_to_depth"); } } @@ -2343,6 +2346,7 @@ void FramebufferManagerCommon::DeviceLost() { DoRelease(draw2DPipelineColor_); DoRelease(draw2DPipelineDepth_); DoRelease(draw2DPipeline565ToDepth_); + DoRelease(draw2DPipeline565ToDepthDeswizzle_); draw_ = nullptr; } diff --git a/GPU/Common/FramebufferManagerCommon.h b/GPU/Common/FramebufferManagerCommon.h index 447554f376f8..ce5a31a87a99 100644 --- a/GPU/Common/FramebufferManagerCommon.h +++ b/GPU/Common/FramebufferManagerCommon.h @@ -511,6 +511,7 @@ class FramebufferManagerCommon { Draw::Pipeline *draw2DPipelineColor_ = nullptr; Draw::Pipeline *draw2DPipelineDepth_ = nullptr; Draw::Pipeline *draw2DPipeline565ToDepth_ = nullptr; + Draw::Pipeline *draw2DPipeline565ToDepthDeswizzle_ = nullptr; Draw::SamplerState *draw2DSamplerLinear_ = nullptr; Draw::SamplerState *draw2DSamplerNearest_ = nullptr; Draw::ShaderModule *draw2DVs_ = nullptr; From f3496d34c859d2d41189df3ac708f6042cad1447 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Henrik=20Rydg=C3=A5rd?= Date: Fri, 19 Aug 2022 00:02:22 +0200 Subject: [PATCH 04/13] Texture bind channel fix. Still need to figure out a way to decide to swizzle or not. --- GPU/Common/TextureCacheCommon.cpp | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/GPU/Common/TextureCacheCommon.cpp b/GPU/Common/TextureCacheCommon.cpp index 64a48537b638..1c542633ca01 100644 --- a/GPU/Common/TextureCacheCommon.cpp +++ b/GPU/Common/TextureCacheCommon.cpp @@ -1838,9 +1838,15 @@ bool CanDepalettize(GETextureFormat texFormat, GEBufferFormat bufferFormat) { case GE_FORMAT_565: case GE_FORMAT_5551: case GE_FORMAT_DEPTH16: - return texFormat == GE_TFMT_CLUT16; + if (texFormat == GE_TFMT_CLUT16) { + return true; + } + break; case GE_FORMAT_8888: - return texFormat == GE_TFMT_CLUT32; + if (texFormat == GE_TFMT_CLUT32) { + return true; + } + break; } WARN_LOG(G3D, "Invalid CLUT/framebuffer combination: %s vs %s", GeTextureFormatToString(texFormat), GeBufferFormatToString(bufferFormat)); return false; From 89c96142a17d9ad7e5ad433bbeeb6516e411d6c4 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Henrik=20Rydg=C3=A5rd?= Date: Fri, 19 Aug 2022 00:34:02 +0200 Subject: [PATCH 05/13] Really not sure how we can detect whether a depth deswizzle is needed :( Hopefully temporary flag... --- Core/Compatibility.cpp | 1 + Core/Compatibility.h | 1 + Core/CoreParameter.h | 2 +- GPU/Common/FramebufferManagerCommon.cpp | 12 ++++++++++-- assets/compat.ini | 10 ++++++++++ 5 files changed, 23 insertions(+), 3 deletions(-) diff --git a/Core/Compatibility.cpp b/Core/Compatibility.cpp index c56cf702f8ce..f17c1fd6a1f4 100644 --- a/Core/Compatibility.cpp +++ b/Core/Compatibility.cpp @@ -97,6 +97,7 @@ void Compatibility::CheckSettings(IniFile &iniFile, const std::string &gameID) { CheckSetting(iniFile, gameID, "ZZT3SelectHack", &flags_.ZZT3SelectHack); CheckSetting(iniFile, gameID, "AllowLargeFBTextureOffsets", &flags_.AllowLargeFBTextureOffsets); CheckSetting(iniFile, gameID, "AtracLoopHack", &flags_.AtracLoopHack); + CheckSetting(iniFile, gameID, "DeswizzleDepth", &flags_.DeswizzleDepth); } void Compatibility::CheckSetting(IniFile &iniFile, const std::string &gameID, const char *option, bool *flag) { diff --git a/Core/Compatibility.h b/Core/Compatibility.h index c293fdfa17af..29345aee42ca 100644 --- a/Core/Compatibility.h +++ b/Core/Compatibility.h @@ -87,6 +87,7 @@ struct CompatFlags { bool ZZT3SelectHack; bool AllowLargeFBTextureOffsets; bool AtracLoopHack; + bool DeswizzleDepth; }; class IniFile; diff --git a/Core/CoreParameter.h b/Core/CoreParameter.h index 7e9ea60b2e46..9ef01c4d9bac 100644 --- a/Core/CoreParameter.h +++ b/Core/CoreParameter.h @@ -66,7 +66,7 @@ struct CoreParameter { bool headLess; // Try to avoid messageboxes etc // Internal PSP rendering resolution and scale factor. - int renderScaleFactor; + int renderScaleFactor = 1; int renderWidth; int renderHeight; diff --git a/GPU/Common/FramebufferManagerCommon.cpp b/GPU/Common/FramebufferManagerCommon.cpp index c1052b785d01..1bfdafb5eb41 100644 --- a/GPU/Common/FramebufferManagerCommon.cpp +++ b/GPU/Common/FramebufferManagerCommon.cpp @@ -551,7 +551,7 @@ void FramebufferManagerCommon::CopyToDepthFromOverlappingFramebuffers(VirtualFra // For now, let's just do the last thing, if there are multiple. // for (auto &source : sources) { - if (sources.size()) { + if (!sources.empty()) { auto &source = sources.back(); if (source.channel == RASTER_DEPTH) { // Good old depth->depth copy. @@ -563,11 +563,19 @@ void FramebufferManagerCommon::CopyToDepthFromOverlappingFramebuffers(VirtualFra if (src->drawnFormat != GE_FORMAT_565) { WARN_LOG_ONCE(not565, G3D, "Drawn format of buffer at %08x not 565 as expected", src->fb_address); } + + // Really hate to do this, but tracking the depth swizzle state across multiple + // copies is not easy. + Draw2DShader shader = DRAW2D_565_TO_DEPTH; + if (PSP_CoreParameter().compat.flags().DeswizzleDepth) { + shader = DRAW2D_565_TO_DEPTH_DESWIZZLE; + } + // Copying color to depth. BlitUsingRaster( src->fbo, 0.0f, 0.0f, src->renderWidth, src->renderHeight, dest->fbo, 0.0f, 0.0f, src->renderWidth, src->renderHeight, - false, DRAW2D_565_TO_DEPTH_DESWIZZLE, "565_to_depth"); + false, shader, "565_to_depth"); } } diff --git a/assets/compat.ini b/assets/compat.ini index c6011605d780..0903e5244198 100644 --- a/assets/compat.ini +++ b/assets/compat.ini @@ -1266,3 +1266,13 @@ ULES00618 = true # Silver Fall ULES00808 = true ULUS10270 = true + +[DeswizzleDepth] +UCUS98633 = true +UCAS40145 = true +UCES00420 = true +UCJS10052 = true +UCKS45048 = true +UCJS18030 = true +UCJS18047 = true +NPJG00015 = true From 412d44dc9253943b34bdeb59acc97aee9919f0ad Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Henrik=20Rydg=C3=A5rd?= Date: Sat, 20 Aug 2022 12:20:37 +0200 Subject: [PATCH 06/13] Fix glitch when changing render resolution --- GPU/Common/FramebufferManagerCommon.cpp | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/GPU/Common/FramebufferManagerCommon.cpp b/GPU/Common/FramebufferManagerCommon.cpp index 1bfdafb5eb41..8bc5c9af0377 100644 --- a/GPU/Common/FramebufferManagerCommon.cpp +++ b/GPU/Common/FramebufferManagerCommon.cpp @@ -1906,6 +1906,13 @@ void FramebufferManagerCommon::Resized() { // Might have a new post shader - let's compile it. presentation_->UpdatePostShader(); + // Reset all shaders that might have resolution compiled-in. + if (draw2DPipeline565ToDepthDeswizzle_) { + draw2DPipeline565ToDepthDeswizzle_->Release(); + draw2DPipeline565ToDepthDeswizzle_ = nullptr; + } + + #ifdef _WIN32 // Seems related - if you're ok with numbers all the time, show some more :) if (g_Config.iShowFPSCounter != 0) { From e2707d3ab393eaa9f084884b767d7919efeab224 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Henrik=20Rydg=C3=A5rd?= Date: Sat, 20 Aug 2022 16:31:53 +0200 Subject: [PATCH 07/13] Remove debug code, cleanup --- GPU/Common/TextureCacheCommon.cpp | 6 ------ assets/compat.ini | 8 ++++---- 2 files changed, 4 insertions(+), 10 deletions(-) diff --git a/GPU/Common/TextureCacheCommon.cpp b/GPU/Common/TextureCacheCommon.cpp index 1c542633ca01..bca4d6a9f473 100644 --- a/GPU/Common/TextureCacheCommon.cpp +++ b/GPU/Common/TextureCacheCommon.cpp @@ -525,10 +525,6 @@ TexCacheEntry *TextureCacheCommon::SetTexture() { def.format = format; def.bufw = bufw; - if (texaddr == 0x04710000) { - texaddr = texaddr; - } - std::vector candidates = GetFramebufferCandidates(def, 0); if (candidates.size() > 0) { int index = GetBestCandidateIndex(candidates); @@ -978,8 +974,6 @@ bool TextureCacheCommon::MatchFramebuffer( if (matchingClutFormat) { if (!noOffset) { WARN_LOG_ONCE(subareaClut, G3D, "Texturing from framebuffer (%s) using %s with offset at %08x +%dx%d", channel == RASTER_DEPTH ? "DEPTH" : "COLOR", GeTextureFormatToString(entry.format), fb_address, matchInfo->xOffset, matchInfo->yOffset); - } else { - WARN_LOG_ONCE(subareaClut, G3D, "Texturing from framebuffer (%s) using %s at %08x", channel == RASTER_DEPTH ? "DEPTH" : "COLOR", GeTextureFormatToString(entry.format), fb_address); } return true; } else if (IsClutFormat((GETextureFormat)(entry.format)) || IsDXTFormat((GETextureFormat)(entry.format))) { diff --git a/assets/compat.ini b/assets/compat.ini index 0903e5244198..974a6216defd 100644 --- a/assets/compat.ini +++ b/assets/compat.ini @@ -1102,6 +1102,10 @@ NPEH00029 = true ULUS10455 = true [BlueToAlpha] +# Some games render first to RGB of a 4444 texture, then they switch to 565 and render masked to blue, +# just to be able to render to the alpha channel of the 4444. We can detect that and reroute rendering +# to avoid problems. + # Split/Second ULES01402 = true ULUS10513 = true @@ -1121,10 +1125,6 @@ ULES00262 = true ULUS10064 = true ULKS46087 = true -# Some games render first to RGB of a 4444 texture, then they switch to 565 and render masked to blue, -# just to be able to render to the alpha channel of the 4444. We can detect that and reroute rendering -# to avoid problems. - [DateLimited] # Car Jack Streets - issue #12698 NPUZ00043 = true From ca24f1b9aab0d3d775f2382761a22381c47cf790 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Henrik=20Rydg=C3=A5rd?= Date: Sat, 20 Aug 2022 16:32:04 +0200 Subject: [PATCH 08/13] Fixes for D3D11 --- Common/GPU/ShaderWriter.cpp | 16 ++++++++++++++++ Common/GPU/ShaderWriter.h | 3 ++- GPU/Common/Draw2D.cpp | 7 ++++++- GPU/Common/FramebufferManagerCommon.cpp | 2 ++ 4 files changed, 26 insertions(+), 2 deletions(-) diff --git a/Common/GPU/ShaderWriter.cpp b/Common/GPU/ShaderWriter.cpp index 900e2d116014..0332e4d3460e 100644 --- a/Common/GPU/ShaderWriter.cpp +++ b/Common/GPU/ShaderWriter.cpp @@ -423,3 +423,19 @@ ShaderWriter &ShaderWriter::SampleTexture2D(const char *sampName, const char *uv } return *this; } + +ShaderWriter &ShaderWriter::GetTextureSize(const char *szVariable, const char *texName) { + switch (lang_.shaderLanguage) { + case HLSL_D3D11: + F(" float2 %s; %s.GetDimensions(%s.x, %s.y);", szVariable, texName, szVariable, szVariable); + break; + case HLSL_D3D9: + F(" float2 %s; %s.GetDimensions(%s.x, %s.y);", szVariable, texName, szVariable, szVariable); + break; + default: + // Note: we ignore the sampler. make sure you bound samplers to the textures correctly. + F("vec2 %s = textureSize(%s, 0);", szVariable, texName); + break; + } + return *this; +} diff --git a/Common/GPU/ShaderWriter.h b/Common/GPU/ShaderWriter.h index f07d4159f568..1a1896c75bf6 100644 --- a/Common/GPU/ShaderWriter.h +++ b/Common/GPU/ShaderWriter.h @@ -83,7 +83,8 @@ class ShaderWriter { void ConstFloat(const char *name, float value); - ShaderWriter &SampleTexture2D(const char *sampName, const char *uv); + ShaderWriter &SampleTexture2D(const char *texName, const char *uv); + ShaderWriter &GetTextureSize(const char *szVariable, const char *texName); // Simple shaders with no special tricks. void BeginVSMain(Slice inputs, Slice uniforms, Slice varyings); diff --git a/GPU/Common/Draw2D.cpp b/GPU/Common/Draw2D.cpp index 89292adfaf83..7a6d0e60f242 100644 --- a/GPU/Common/Draw2D.cpp +++ b/GPU/Common/Draw2D.cpp @@ -70,6 +70,7 @@ RasterChannel GenerateDraw2D565ToDepthFs(ShaderWriter &writer) { writer.F(" highp float depthValue = (floor(rgb.x * 31.99) + floor(rgb.y * 63.99) * 32.0 + floor(rgb.z * 31.99) * 2048.0); \n"); writer.F(" gl_FragDepth = (depthValue / %f) + %f;\n", factors.scale, factors.offset); writer.EndFSMain("outColor", FSFLAG_WRITEDEPTH); + return RASTER_DEPTH; } @@ -83,7 +84,7 @@ RasterChannel GenerateDraw2D565ToDepthDeswizzleFs(ShaderWriter &writer) { // Unlike when just copying a depth buffer, here we're generating new depth values so we'll // have to apply the scaling. DepthScaleFactors factors = GetDepthScaleFactors(); - writer.C(" vec2 tsize = vec2(textureSize(tex, 0));\n"); + writer.GetTextureSize("tsize", "tex").C("\n"); writer.C(" vec2 coord = v_texcoord * tsize;\n"); writer.F(" float strip = 4.0 * %f;\n", g_scale); writer.C(" float in_strip = mod(coord.y, strip);\n"); @@ -255,4 +256,8 @@ void FramebufferManagerCommon::DrawStrip2D(Draw::Texture *tex, Draw2DVertex *ver } draw_->BindSamplerStates(TEX_SLOT_PSP_TEXTURE, 1, linearFilter ? &draw2DSamplerLinear_ : &draw2DSamplerNearest_); draw_->DrawUP(verts, vertexCount); + + draw_->InvalidateCachedState(); + + gstate_c.Dirty(DIRTY_FRAGMENTSHADER_STATE | DIRTY_VERTEXSHADER_STATE); } diff --git a/GPU/Common/FramebufferManagerCommon.cpp b/GPU/Common/FramebufferManagerCommon.cpp index 8bc5c9af0377..8e82cc83f130 100644 --- a/GPU/Common/FramebufferManagerCommon.cpp +++ b/GPU/Common/FramebufferManagerCommon.cpp @@ -552,6 +552,8 @@ void FramebufferManagerCommon::CopyToDepthFromOverlappingFramebuffers(VirtualFra // for (auto &source : sources) { if (!sources.empty()) { + draw_->InvalidateCachedState(); + auto &source = sources.back(); if (source.channel == RASTER_DEPTH) { // Good old depth->depth copy. From 9cc8cfaa082c113f4af18d48e0394d7b63fc7756 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Henrik=20Rydg=C3=A5rd?= Date: Sat, 20 Aug 2022 16:57:02 +0200 Subject: [PATCH 09/13] Use a DX9-compatible method to pass texwidth and height. the effect doesn't work on DX9 yet though. --- GPU/Common/Draw2D.cpp | 27 +++++++++++++++++++++---- GPU/Common/FramebufferManagerCommon.cpp | 2 +- GPU/Common/FramebufferManagerCommon.h | 2 +- 3 files changed, 25 insertions(+), 6 deletions(-) diff --git a/GPU/Common/Draw2D.cpp b/GPU/Common/Draw2D.cpp index 7a6d0e60f242..9c045ecfe0aa 100644 --- a/GPU/Common/Draw2D.cpp +++ b/GPU/Common/Draw2D.cpp @@ -40,6 +40,20 @@ static const SamplerDef samplers[1] = { { "tex" }, }; +static const UniformDef uniforms[1] = { + { "vec2", "texSize", 0 }, +}; + +struct Draw2DUB { + float texSizeX; + float texSizeY; +}; + +const UniformBufferDesc draw2DUBDesc{ sizeof(Draw2DUB), { + { "texSize", -1, 0, UniformType::FLOAT2, 0 }, +} }; + + RasterChannel GenerateDraw2DFs(ShaderWriter &writer) { writer.DeclareSamplers(samplers); writer.BeginFSMain(Slice::empty(), varyings, FSFLAG_NONE); @@ -79,12 +93,12 @@ static float g_scale; RasterChannel GenerateDraw2D565ToDepthDeswizzleFs(ShaderWriter &writer) { writer.DeclareSamplers(samplers); - writer.BeginFSMain(Slice::empty(), varyings, FSFLAG_WRITEDEPTH); + writer.BeginFSMain(uniforms, varyings, FSFLAG_WRITEDEPTH); writer.C(" vec4 outColor = vec4(0.0, 0.0, 0.0, 0.0);\n"); // Unlike when just copying a depth buffer, here we're generating new depth values so we'll // have to apply the scaling. DepthScaleFactors factors = GetDepthScaleFactors(); - writer.GetTextureSize("tsize", "tex").C("\n"); + writer.C(" vec2 tsize = texSize;\n"); writer.C(" vec2 coord = v_texcoord * tsize;\n"); writer.F(" float strip = 4.0 * %f;\n", g_scale); writer.C(" float in_strip = mod(coord.y, strip);\n"); @@ -183,7 +197,7 @@ Draw::Pipeline *FramebufferManagerCommon::Create2DPipeline(RasterChannel (*gener { draw2DVs_, fs }, inputLayout, depthStencil, - blend, rasterNoCull, nullptr, + blend, rasterNoCull, &draw2DUBDesc, }; Draw::Pipeline *pipeline = draw_->CreateGraphicsPipeline(pipelineDesc); @@ -198,7 +212,7 @@ Draw::Pipeline *FramebufferManagerCommon::Create2DPipeline(RasterChannel (*gener return pipeline; } -void FramebufferManagerCommon::DrawStrip2D(Draw::Texture *tex, Draw2DVertex *verts, int vertexCount, bool linearFilter, Draw2DShader shader) { +void FramebufferManagerCommon::DrawStrip2D(Draw::Texture *tex, Draw2DVertex *verts, int vertexCount, bool linearFilter, Draw2DShader shader, float texW, float texH) { using namespace Draw; Ensure2DResources(); @@ -251,6 +265,11 @@ void FramebufferManagerCommon::DrawStrip2D(Draw::Texture *tex, Draw2DVertex *ver break; } + Draw2DUB ub; + ub.texSizeX = tex ? tex->Width() : texW; + ub.texSizeY = tex ? tex->Height() : texH; + draw_->UpdateDynamicUniformBuffer(&ub, sizeof(ub)); + if (tex) { draw_->BindTextures(TEX_SLOT_PSP_TEXTURE, 1, &tex); } diff --git a/GPU/Common/FramebufferManagerCommon.cpp b/GPU/Common/FramebufferManagerCommon.cpp index 8e82cc83f130..aec33f43a834 100644 --- a/GPU/Common/FramebufferManagerCommon.cpp +++ b/GPU/Common/FramebufferManagerCommon.cpp @@ -2566,7 +2566,7 @@ void FramebufferManagerCommon::BlitUsingRaster( Draw::Viewport vp{ 0.0f, 0.0f, (float)dest->Width(), (float)dest->Height(), 0.0f, 1.0f }; draw_->SetViewports(1, &vp); draw_->SetScissorRect(0, 0, (int)dest->Width(), (int)dest->Height()); - DrawStrip2D(nullptr, vtx, 4, linearFilter, shader); + DrawStrip2D(nullptr, vtx, 4, linearFilter, shader, src->Width(), src->Height()); gstate_c.Dirty(DIRTY_BLEND_STATE | DIRTY_DEPTHSTENCIL_STATE | DIRTY_RASTER_STATE | DIRTY_VIEWPORTSCISSOR_STATE | DIRTY_VERTEXSHADER_STATE | DIRTY_FRAGMENTSHADER_STATE); } diff --git a/GPU/Common/FramebufferManagerCommon.h b/GPU/Common/FramebufferManagerCommon.h index ce5a31a87a99..f63b319c3676 100644 --- a/GPU/Common/FramebufferManagerCommon.h +++ b/GPU/Common/FramebufferManagerCommon.h @@ -375,7 +375,7 @@ class FramebufferManagerCommon { Draw::Texture *MakePixelTexture(const u8 *srcPixels, GEBufferFormat srcPixelFormat, int srcStride, int width, int height); void DrawActiveTexture(float x, float y, float w, float h, float destW, float destH, float u0, float v0, float u1, float v1, int uvRotation, int flags); - void DrawStrip2D(Draw::Texture *tex, Draw2DVertex *verts, int vertexCount, bool linearFilter, Draw2DShader channel); + void DrawStrip2D(Draw::Texture *tex, Draw2DVertex *verts, int vertexCount, bool linearFilter, Draw2DShader channel, float texW = 0.0f, float texH = 0.0f); void Ensure2DResources(); Draw::Pipeline *Create2DPipeline(RasterChannel (*generate)(ShaderWriter &)); From 668de1f5448277ed5eb228f7290ab4fac5b64199 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Henrik=20Rydg=C3=A5rd?= Date: Sat, 20 Aug 2022 16:58:59 +0200 Subject: [PATCH 10/13] Compatibility check --- GPU/Common/FramebufferManagerCommon.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/GPU/Common/FramebufferManagerCommon.cpp b/GPU/Common/FramebufferManagerCommon.cpp index aec33f43a834..db493407ff89 100644 --- a/GPU/Common/FramebufferManagerCommon.cpp +++ b/GPU/Common/FramebufferManagerCommon.cpp @@ -560,7 +560,7 @@ void FramebufferManagerCommon::CopyToDepthFromOverlappingFramebuffers(VirtualFra BlitFramebufferDepth(source.vfb, dest); gpuStats.numDepthCopies++; dest->last_frame_depth_updated = gpuStats.numFlips; - } else if (source.channel == RASTER_COLOR) { + } else if (source.channel == RASTER_COLOR && draw_->GetDeviceCaps().fragmentShaderDepthWriteSupported) { VirtualFramebuffer *src = source.vfb; if (src->drawnFormat != GE_FORMAT_565) { WARN_LOG_ONCE(not565, G3D, "Drawn format of buffer at %08x not 565 as expected", src->fb_address); From 80772dd18edd30da5a8ee11270fcf74c85943725 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Henrik=20Rydg=C3=A5rd?= Date: Sun, 21 Aug 2022 08:46:32 +0200 Subject: [PATCH 11/13] Remove leftover resolutionScale depal config parameter --- GPU/Common/DepalettizeCommon.cpp | 1 - GPU/Common/DepalettizeShaderCommon.h | 1 - 2 files changed, 2 deletions(-) diff --git a/GPU/Common/DepalettizeCommon.cpp b/GPU/Common/DepalettizeCommon.cpp index 4d395a2d4a85..185eecdbc4b0 100644 --- a/GPU/Common/DepalettizeCommon.cpp +++ b/GPU/Common/DepalettizeCommon.cpp @@ -173,7 +173,6 @@ DepalShader *DepalShaderCache::GetDepalettizeShader(uint32_t clutMode, GETexture config.mask = gstate.getClutIndexMask(); config.bufferFormat = bufferFormat; config.textureFormat = textureFormat; - config.resolutionScale = 3; GenerateDepalFs(buffer, config, draw_->GetShaderLanguageDesc()); diff --git a/GPU/Common/DepalettizeShaderCommon.h b/GPU/Common/DepalettizeShaderCommon.h index c4577be92044..74bb38d19606 100644 --- a/GPU/Common/DepalettizeShaderCommon.h +++ b/GPU/Common/DepalettizeShaderCommon.h @@ -31,7 +31,6 @@ struct DepalConfig { GEPaletteFormat clutFormat; GETextureFormat textureFormat; GEBufferFormat bufferFormat; - int resolutionScale; }; void GenerateDepalFs(char *buffer, const DepalConfig &config, const ShaderLanguageDesc &lang); From 8e7cf596b6e612e761e517c1c97e5e9774599b15 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Henrik=20Rydg=C3=A5rd?= Date: Sun, 21 Aug 2022 08:53:34 +0200 Subject: [PATCH 12/13] Address more feedback, cleanup scale parameter --- GPU/Common/Draw2D.cpp | 18 +++++++++--------- GPU/Common/FramebufferManagerCommon.cpp | 7 ------- 2 files changed, 9 insertions(+), 16 deletions(-) diff --git a/GPU/Common/Draw2D.cpp b/GPU/Common/Draw2D.cpp index 9c045ecfe0aa..663ff2cc905f 100644 --- a/GPU/Common/Draw2D.cpp +++ b/GPU/Common/Draw2D.cpp @@ -40,17 +40,20 @@ static const SamplerDef samplers[1] = { { "tex" }, }; -static const UniformDef uniforms[1] = { +static const UniformDef uniforms[2] = { { "vec2", "texSize", 0 }, + { "float", "scaleFactor", 1}, }; struct Draw2DUB { float texSizeX; float texSizeY; + float scaleFactor; }; const UniformBufferDesc draw2DUBDesc{ sizeof(Draw2DUB), { { "texSize", -1, 0, UniformType::FLOAT2, 0 }, + { "scaleFactor", -1, 1, UniformType::FLOAT1, 0 }, } }; @@ -88,9 +91,6 @@ RasterChannel GenerateDraw2D565ToDepthFs(ShaderWriter &writer) { return RASTER_DEPTH; } -// ugly way to get the scale into the function -static float g_scale; - RasterChannel GenerateDraw2D565ToDepthDeswizzleFs(ShaderWriter &writer) { writer.DeclareSamplers(samplers); writer.BeginFSMain(uniforms, varyings, FSFLAG_WRITEDEPTH); @@ -100,7 +100,7 @@ RasterChannel GenerateDraw2D565ToDepthDeswizzleFs(ShaderWriter &writer) { DepthScaleFactors factors = GetDepthScaleFactors(); writer.C(" vec2 tsize = texSize;\n"); writer.C(" vec2 coord = v_texcoord * tsize;\n"); - writer.F(" float strip = 4.0 * %f;\n", g_scale); + writer.F(" float strip = 4.0 * scaleFactor;\n"); writer.C(" float in_strip = mod(coord.y, strip);\n"); writer.C(" coord.y = coord.y - in_strip + strip - in_strip;\n"); writer.C(" coord /= tsize;\n"); @@ -234,8 +234,8 @@ void FramebufferManagerCommon::DrawStrip2D(Draw::Texture *tex, Draw2DVertex *ver } if (!draw2DPipelineDepth_) { draw2DPipelineDepth_ = Create2DPipeline(&GenerateDraw2DDepthFs); - linearFilter = false; } + linearFilter = false; draw_->BindPipeline(draw2DPipelineDepth_); break; @@ -246,8 +246,8 @@ void FramebufferManagerCommon::DrawStrip2D(Draw::Texture *tex, Draw2DVertex *ver } if (!draw2DPipeline565ToDepth_) { draw2DPipeline565ToDepth_ = Create2DPipeline(&GenerateDraw2D565ToDepthFs); - linearFilter = false; } + linearFilter = false; draw_->BindPipeline(draw2DPipeline565ToDepth_); break; @@ -257,10 +257,9 @@ void FramebufferManagerCommon::DrawStrip2D(Draw::Texture *tex, Draw2DVertex *ver return; } if (!draw2DPipeline565ToDepthDeswizzle_) { - g_scale = renderScaleFactor_; draw2DPipeline565ToDepthDeswizzle_ = Create2DPipeline(&GenerateDraw2D565ToDepthDeswizzleFs); - linearFilter = false; } + linearFilter = false; draw_->BindPipeline(draw2DPipeline565ToDepthDeswizzle_); break; } @@ -268,6 +267,7 @@ void FramebufferManagerCommon::DrawStrip2D(Draw::Texture *tex, Draw2DVertex *ver Draw2DUB ub; ub.texSizeX = tex ? tex->Width() : texW; ub.texSizeY = tex ? tex->Height() : texH; + ub.scaleFactor = (float)renderScaleFactor_; draw_->UpdateDynamicUniformBuffer(&ub, sizeof(ub)); if (tex) { diff --git a/GPU/Common/FramebufferManagerCommon.cpp b/GPU/Common/FramebufferManagerCommon.cpp index db493407ff89..bfdc23815f15 100644 --- a/GPU/Common/FramebufferManagerCommon.cpp +++ b/GPU/Common/FramebufferManagerCommon.cpp @@ -1908,13 +1908,6 @@ void FramebufferManagerCommon::Resized() { // Might have a new post shader - let's compile it. presentation_->UpdatePostShader(); - // Reset all shaders that might have resolution compiled-in. - if (draw2DPipeline565ToDepthDeswizzle_) { - draw2DPipeline565ToDepthDeswizzle_->Release(); - draw2DPipeline565ToDepthDeswizzle_ = nullptr; - } - - #ifdef _WIN32 // Seems related - if you're ok with numbers all the time, show some more :) if (g_Config.iShowFPSCounter != 0) { From a11e7e167bfed310f2d9af25620a4293f8c4b1b0 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Henrik=20Rydg=C3=A5rd?= Date: Sun, 21 Aug 2022 09:57:19 +0200 Subject: [PATCH 13/13] More D3D9 work on depth textures. Something still missing. --- GPU/Common/DepalettizeShaderCommon.cpp | 16 +++++++++++++++- GPU/Common/TextureCacheCommon.cpp | 1 - GPU/Directx9/GPU_DX9.cpp | 1 + 3 files changed, 16 insertions(+), 2 deletions(-) diff --git a/GPU/Common/DepalettizeShaderCommon.cpp b/GPU/Common/DepalettizeShaderCommon.cpp index 11c3c5039447..af2bf13437eb 100644 --- a/GPU/Common/DepalettizeShaderCommon.cpp +++ b/GPU/Common/DepalettizeShaderCommon.cpp @@ -152,6 +152,8 @@ void GenerateDepalShaderFloat(ShaderWriter &writer, const DepalConfig &config, c writer.ConstFloat("z_offset", factors.offset); } + writer.C(" vec4 index = ").SampleTexture2D("tex", "v_texcoord").C(";\n"); + float index_multiplier = 1.0f; // pixelformat is the format of the texture we are sampling. bool formatOK = true; @@ -233,6 +235,19 @@ void GenerateDepalShaderFloat(ShaderWriter &writer, const DepalConfig &config, c case GE_FORMAT_DEPTH16: { // TODO: I think we can handle most scenarios here, but texturing from depth buffers requires an extension on ES 2.0 anyway. + // Not on D3D9 though, so this path is still relevant. + + if (config.bufferFormat == GE_FORMAT_DEPTH16 && config.textureFormat == GE_TFMT_5650) { + // Convert depth to 565, without going through a CLUT. + writer.C(" float depth = (index.x - z_offset) * z_scale;\n"); + writer.C(" float idepth = floor(clamp(depth, 0.0, 65535.0));\n"); + writer.C(" float r = mod(idepth, 32.0) / 31.0f;\n"); + writer.C(" float g = mod(floor(idepth / 32.0), 64.0) / 63.0f;\n"); + writer.C(" float b = mod(floor(idepth / 2048.0), 32.0) / 31.0f;\n"); + writer.C(" vec4 outColor = vec4(r, g, b, 1.0);\n"); + return; + } + if (shift < 16) { index_multiplier = 1.0f / (float)(1 << shift); truncate_cpy(lookupMethod, "((index.x - z_offset) * z_scale)"); @@ -269,7 +284,6 @@ void GenerateDepalShaderFloat(ShaderWriter &writer, const DepalConfig &config, c char offset[128] = ""; sprintf(offset, " + %f", texel_offset); - writer.C(" vec4 index = ").SampleTexture2D("tex", "v_texcoord").C(";\n"); writer.F(" float coord = (%s * %f)%s;\n", lookupMethod, index_multiplier, offset); writer.C(" vec4 outColor = ").SampleTexture2D("pal", "vec2(coord, 0.0)").C(";\n"); } diff --git a/GPU/Common/TextureCacheCommon.cpp b/GPU/Common/TextureCacheCommon.cpp index bca4d6a9f473..e4a8c9cd330f 100644 --- a/GPU/Common/TextureCacheCommon.cpp +++ b/GPU/Common/TextureCacheCommon.cpp @@ -1863,7 +1863,6 @@ void TextureCacheCommon::ApplyTextureFramebuffer(VirtualFramebuffer *framebuffer // TODO: Implement shader depal in the fragment shader generator for D3D11 at least. if (!draw_->GetDeviceCaps().fragmentShaderInt32Supported) { useShaderDepal = false; - depth = false; // Can't support this } switch (draw_->GetShaderLanguageDesc().shaderLanguage) { diff --git a/GPU/Directx9/GPU_DX9.cpp b/GPU/Directx9/GPU_DX9.cpp index c3f4476fcb1f..b36cb3428887 100644 --- a/GPU/Directx9/GPU_DX9.cpp +++ b/GPU/Directx9/GPU_DX9.cpp @@ -160,6 +160,7 @@ void GPU_DX9::CheckGPUFeatures() { u32 features = 0; features |= GPU_SUPPORTS_16BIT_FORMATS; features |= GPU_SUPPORTS_BLEND_MINMAX; + features |= GPU_SUPPORTS_DEPTH_TEXTURE; features |= GPU_SUPPORTS_TEXTURE_LOD_CONTROL; // Accurate depth is required because the Direct3D API does not support inverse Z.