From f6aa86dfee38bfaddb18b6605fb6339473425688 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Henrik=20Rydg=C3=A5rd?= Date: Wed, 17 Jul 2024 12:27:41 +0200 Subject: [PATCH 1/5] Add a comment --- GPU/Vulkan/DrawEngineVulkan.cpp | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/GPU/Vulkan/DrawEngineVulkan.cpp b/GPU/Vulkan/DrawEngineVulkan.cpp index c8d0250888cf..befe01da8977 100644 --- a/GPU/Vulkan/DrawEngineVulkan.cpp +++ b/GPU/Vulkan/DrawEngineVulkan.cpp @@ -391,6 +391,8 @@ void DrawEngineVulkan::DoFlush() { gpuStats.numUncachedVertsDrawn += vertexCount; prim = IndexGenerator::GeneralPrim((GEPrimitiveType)drawInds_[0].prim); + // At this point, the output is always an index triangle/line/point list, no strips/fans. + u16 *inds = decIndex_; SoftwareTransformResult result{}; SoftwareTransformParams params{}; @@ -403,10 +405,11 @@ void DrawEngineVulkan::DoFlush() { // do not respect scissor rects. params.allowClear = framebufferManager_->UseBufferedRendering(); params.allowSeparateAlphaClear = false; - params.provokeFlatFirst = true; if (renderManager->GetVulkanContext()->GetDeviceFeatures().enabled.provokingVertex.provokingVertexLast) { // We can get the OpenGL behavior, no need for workarounds. params.provokeFlatFirst = false; + } else { + params.provokeFlatFirst = true; } params.flippedY = true; params.usesHalfZ = true; From 665f03ff6251c5d5ffebd5068daea9435f77cf2b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Henrik=20Rydg=C3=A5rd?= Date: Wed, 17 Jul 2024 12:37:20 +0200 Subject: [PATCH 2/5] Add provoking vertex to caps, flip the flag around --- Common/GPU/D3D11/thin3d_d3d11.cpp | 2 ++ Common/GPU/D3D9/thin3d_d3d9.cpp | 2 ++ Common/GPU/OpenGL/thin3d_gl.cpp | 3 +++ Common/GPU/Vulkan/thin3d_vulkan.cpp | 3 +++ Common/GPU/thin3d.h | 2 +- GPU/Common/SoftwareTransformCommon.cpp | 2 +- GPU/Common/SoftwareTransformCommon.h | 2 +- GPU/D3D11/DrawEngineD3D11.cpp | 2 +- GPU/Directx9/DrawEngineDX9.cpp | 2 +- GPU/GLES/DrawEngineGLES.cpp | 2 +- GPU/Vulkan/DrawEngineVulkan.cpp | 4 ++-- 11 files changed, 18 insertions(+), 8 deletions(-) diff --git a/Common/GPU/D3D11/thin3d_d3d11.cpp b/Common/GPU/D3D11/thin3d_d3d11.cpp index 380daffb51f0..6ac9b4e593b4 100644 --- a/Common/GPU/D3D11/thin3d_d3d11.cpp +++ b/Common/GPU/D3D11/thin3d_d3d11.cpp @@ -281,6 +281,8 @@ D3D11DrawContext::D3D11DrawContext(ID3D11Device *device, ID3D11DeviceContext *de caps_.blendMinMaxSupported = true; caps_.multiSampleLevelsMask = 1; // More could be supported with some work. + caps_.provokingVertexLast = false; // D3D has it first, unfortunately. (and no way to change it). + caps_.presentInstantModeChange = true; caps_.presentMaxInterval = 4; caps_.presentModesSupported = PresentMode::FIFO | PresentMode::IMMEDIATE; diff --git a/Common/GPU/D3D9/thin3d_d3d9.cpp b/Common/GPU/D3D9/thin3d_d3d9.cpp index 0e12092ee036..97f0919e163b 100644 --- a/Common/GPU/D3D9/thin3d_d3d9.cpp +++ b/Common/GPU/D3D9/thin3d_d3d9.cpp @@ -786,6 +786,8 @@ D3D9Context::D3D9Context(IDirect3D9 *d3d, IDirect3D9Ex *d3dEx, int adapterId, ID caps_.presentMaxInterval = 1; caps_.presentModesSupported = PresentMode::FIFO; + caps_.provokingVertexLast = false; // D3D has it first, unfortunately (and no way to change it). + if ((caps.RasterCaps & D3DPRASTERCAPS_ANISOTROPY) != 0 && caps.MaxAnisotropy > 1) { caps_.anisoSupported = true; } diff --git a/Common/GPU/OpenGL/thin3d_gl.cpp b/Common/GPU/OpenGL/thin3d_gl.cpp index bd615d6b648b..e8ff1d4d9cee 100644 --- a/Common/GPU/OpenGL/thin3d_gl.cpp +++ b/Common/GPU/OpenGL/thin3d_gl.cpp @@ -598,6 +598,9 @@ OpenGLContext::OpenGLContext(bool canChangeSwapInterval) : renderManager_(frameT // GLES has no support for logic framebuffer operations. There doesn't even seem to exist any such extensions. caps_.logicOpSupported = !gl_extensions.IsGLES; + // Always the case in GL (which is what we want for PSP flat shade). + caps_.provokingVertexLast = true; + // Interesting potential hack for emulating GL_DEPTH_CLAMP (use a separate varying, force depth in fragment shader): // This will induce a performance penalty on many architectures though so a blanket enable of this // is probably not a good idea. diff --git a/Common/GPU/Vulkan/thin3d_vulkan.cpp b/Common/GPU/Vulkan/thin3d_vulkan.cpp index d03ab9eaff80..9c9eec436760 100644 --- a/Common/GPU/Vulkan/thin3d_vulkan.cpp +++ b/Common/GPU/Vulkan/thin3d_vulkan.cpp @@ -913,6 +913,9 @@ VKContext::VKContext(VulkanContext *vulkan, bool useRenderThread) caps_.sampleRateShadingSupported = vulkan->GetDeviceFeatures().enabled.standard.sampleRateShading != 0; caps_.textureSwizzleSupported = true; + // Note that it must also be enabled on the pipelines (which we do). + caps_.provokingVertexLast = vulkan->GetDeviceFeatures().enabled.provokingVertex.provokingVertexLast; + // Present mode stuff caps_.presentMaxInterval = 1; caps_.presentInstantModeChange = false; // TODO: Fix this with some work in VulkanContext diff --git a/Common/GPU/thin3d.h b/Common/GPU/thin3d.h index dc7ba23b24d4..b1a9f9448702 100644 --- a/Common/GPU/thin3d.h +++ b/Common/GPU/thin3d.h @@ -612,7 +612,7 @@ struct DeviceCaps { bool setMaxFrameLatencySupported; bool textureSwizzleSupported; bool requiresHalfPixelOffset; - + bool provokingVertexLast; // GL behavior, what the PSP does bool verySlowShaderCompiler; // From the other backends, we can detect if D3D9 support is known bad (like on Xe) and disable it. diff --git a/GPU/Common/SoftwareTransformCommon.cpp b/GPU/Common/SoftwareTransformCommon.cpp index fd117de69a93..0a062898acaf 100644 --- a/GPU/Common/SoftwareTransformCommon.cpp +++ b/GPU/Common/SoftwareTransformCommon.cpp @@ -203,7 +203,7 @@ void SoftwareTransform::Transform(int prim, u32 vertType, const DecVtxFormat &de } int provokeIndOffset = 0; - if (params_.provokeFlatFirst) { + if (!params_.provokingVertexLast) { provokeIndOffset = ColorIndexOffset(prim, gstate.getShadeMode(), gstate.isModeClear()); } diff --git a/GPU/Common/SoftwareTransformCommon.h b/GPU/Common/SoftwareTransformCommon.h index 78717cdfa3ae..db001d128226 100644 --- a/GPU/Common/SoftwareTransformCommon.h +++ b/GPU/Common/SoftwareTransformCommon.h @@ -57,9 +57,9 @@ struct SoftwareTransformParams { TextureCacheCommon *texCache; bool allowClear; bool allowSeparateAlphaClear; - bool provokeFlatFirst; bool flippedY; bool usesHalfZ; + bool provokingVertexLast; }; class SoftwareTransform { diff --git a/GPU/D3D11/DrawEngineD3D11.cpp b/GPU/D3D11/DrawEngineD3D11.cpp index f415745d7021..e1145b514ac1 100644 --- a/GPU/D3D11/DrawEngineD3D11.cpp +++ b/GPU/D3D11/DrawEngineD3D11.cpp @@ -382,7 +382,7 @@ void DrawEngineD3D11::DoFlush() { params.texCache = textureCache_; params.allowClear = true; params.allowSeparateAlphaClear = false; // D3D11 doesn't support separate alpha clears - params.provokeFlatFirst = true; + params.provokingVertexLast = false; params.flippedY = false; params.usesHalfZ = true; diff --git a/GPU/Directx9/DrawEngineDX9.cpp b/GPU/Directx9/DrawEngineDX9.cpp index 4d16fd6701fe..f39969d88afa 100644 --- a/GPU/Directx9/DrawEngineDX9.cpp +++ b/GPU/Directx9/DrawEngineDX9.cpp @@ -334,7 +334,7 @@ void DrawEngineDX9::DoFlush() { params.texCache = textureCache_; params.allowClear = true; params.allowSeparateAlphaClear = false; - params.provokeFlatFirst = true; + params.provokingVertexLast = false; params.flippedY = false; params.usesHalfZ = true; diff --git a/GPU/GLES/DrawEngineGLES.cpp b/GPU/GLES/DrawEngineGLES.cpp index d6da9070de98..74be51f0a793 100644 --- a/GPU/GLES/DrawEngineGLES.cpp +++ b/GPU/GLES/DrawEngineGLES.cpp @@ -356,7 +356,7 @@ void DrawEngineGLES::DoFlush() { params.texCache = textureCache_; params.allowClear = true; // Clear in OpenGL respects scissor rects, so we'll use it. params.allowSeparateAlphaClear = true; - params.provokeFlatFirst = false; + params.provokingVertexLast = true; params.flippedY = framebufferManager_->UseBufferedRendering(); params.usesHalfZ = false; diff --git a/GPU/Vulkan/DrawEngineVulkan.cpp b/GPU/Vulkan/DrawEngineVulkan.cpp index befe01da8977..9cb6d752ba49 100644 --- a/GPU/Vulkan/DrawEngineVulkan.cpp +++ b/GPU/Vulkan/DrawEngineVulkan.cpp @@ -407,9 +407,9 @@ void DrawEngineVulkan::DoFlush() { params.allowSeparateAlphaClear = false; if (renderManager->GetVulkanContext()->GetDeviceFeatures().enabled.provokingVertex.provokingVertexLast) { // We can get the OpenGL behavior, no need for workarounds. - params.provokeFlatFirst = false; + params.provokingVertexLast = true; } else { - params.provokeFlatFirst = true; + params.provokingVertexLast = false; } params.flippedY = true; params.usesHalfZ = true; From 6b1e57a8402f2a5f33d3af8837807607bbb8435e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Henrik=20Rydg=C3=A5rd?= Date: Wed, 17 Jul 2024 13:01:25 +0200 Subject: [PATCH 3/5] We never draw non-indexed in software transform mode, so get rid of the path. --- GPU/Common/SoftwareTransformCommon.cpp | 9 +-------- GPU/Common/SoftwareTransformCommon.h | 4 +--- GPU/D3D11/DrawEngineD3D11.cpp | 20 ++++++++------------ GPU/Directx9/DrawEngineDX9.cpp | 8 ++------ GPU/GLES/DrawEngineGLES.cpp | 18 ++++++------------ GPU/Vulkan/DrawEngineVulkan.cpp | 16 +++++----------- 6 files changed, 23 insertions(+), 52 deletions(-) diff --git a/GPU/Common/SoftwareTransformCommon.cpp b/GPU/Common/SoftwareTransformCommon.cpp index 0a062898acaf..6e41c1476b53 100644 --- a/GPU/Common/SoftwareTransformCommon.cpp +++ b/GPU/Common/SoftwareTransformCommon.cpp @@ -497,13 +497,11 @@ void SoftwareTransform::BuildDrawingParams(int prim, int vertexCount, u32 vertTy if (prim == GE_PRIM_RECTANGLES) { if (!ExpandRectangles(vertexCount, numDecodedVerts, vertsSize, inds, indsSize, transformed, transformedExpanded, numTrans, throughmode, &result->pixelMapped)) { - result->drawIndexed = false; result->drawNumTrans = 0; result->pixelMapped = false; return; } result->drawBuffer = transformedExpanded; - result->drawIndexed = true; // We don't know the color until here, so we have to do it now, instead of in StateMapping. // Might want to reconsider the order of things later... @@ -521,25 +519,20 @@ void SoftwareTransform::BuildDrawingParams(int prim, int vertexCount, u32 vertTy } else if (prim == GE_PRIM_POINTS) { result->pixelMapped = false; if (!ExpandPoints(vertexCount, numDecodedVerts, vertsSize, inds, indsSize, transformed, transformedExpanded, numTrans, throughmode)) { - result->drawIndexed = false; result->drawNumTrans = 0; return; } result->drawBuffer = transformedExpanded; - result->drawIndexed = true; } else if (prim == GE_PRIM_LINES) { result->pixelMapped = false; if (!ExpandLines(vertexCount, numDecodedVerts, vertsSize, inds, indsSize, transformed, transformedExpanded, numTrans, throughmode)) { - result->drawIndexed = false; result->drawNumTrans = 0; return; } result->drawBuffer = transformedExpanded; - result->drawIndexed = true; } else { // We can simply draw the unexpanded buffer. numTrans = vertexCount; - result->drawIndexed = true; result->pixelMapped = false; // If we don't support custom cull in the shader, process it here. @@ -635,7 +628,7 @@ void SoftwareTransform::BuildDrawingParams(int prim, int vertexCount, u32 vertTy gpuStats.numClears++; } - result->action = SW_DRAW_PRIMITIVES; + result->action = SW_DRAW_INDEXED; result->drawNumTrans = numTrans; } diff --git a/GPU/Common/SoftwareTransformCommon.h b/GPU/Common/SoftwareTransformCommon.h index db001d128226..9896b04bdae9 100644 --- a/GPU/Common/SoftwareTransformCommon.h +++ b/GPU/Common/SoftwareTransformCommon.h @@ -26,7 +26,7 @@ class TextureCacheCommon; enum SoftwareTransformAction { SW_NOT_READY, - SW_DRAW_PRIMITIVES, + SW_DRAW_INDEXED, SW_CLEAR, }; @@ -44,8 +44,6 @@ struct SoftwareTransformResult { TransformedVertex *drawBuffer; int drawNumTrans; - bool drawIndexed; - bool pixelMapped; }; diff --git a/GPU/D3D11/DrawEngineD3D11.cpp b/GPU/D3D11/DrawEngineD3D11.cpp index e1145b514ac1..37582772c9fb 100644 --- a/GPU/D3D11/DrawEngineD3D11.cpp +++ b/GPU/D3D11/DrawEngineD3D11.cpp @@ -424,7 +424,7 @@ void DrawEngineD3D11::DoFlush() { ApplyDrawStateLate(result.setStencil, result.stencilValue); - if (result.action == SW_DRAW_PRIMITIVES) { + if (result.action == SW_DRAW_INDEXED) { D3D11VertexShader *vshader; D3D11FragmentShader *fshader; shaderManager_->GetShaders(prim, dec_, &vshader, &fshader, pipelineState_, false, false, decOptions_.expandAllWeightsToFloat, true); @@ -452,17 +452,13 @@ void DrawEngineD3D11::DoFlush() { pushVerts_->EndPush(context_); ID3D11Buffer *buf = pushVerts_->Buf(); context_->IASetVertexBuffers(0, 1, &buf, &stride, &vOffset); - if (result.drawIndexed) { - UINT iOffset; - int iSize = sizeof(uint16_t) * result.drawNumTrans; - uint8_t *iptr = pushInds_->BeginPush(context_, &iOffset, iSize); - memcpy(iptr, inds, iSize); - pushInds_->EndPush(context_); - context_->IASetIndexBuffer(pushInds_->Buf(), DXGI_FORMAT_R16_UINT, iOffset); - context_->DrawIndexed(result.drawNumTrans, 0, 0); - } else { - context_->Draw(result.drawNumTrans, 0); - } + UINT iOffset; + int iSize = sizeof(uint16_t) * result.drawNumTrans; + uint8_t *iptr = pushInds_->BeginPush(context_, &iOffset, iSize); + memcpy(iptr, inds, iSize); + pushInds_->EndPush(context_); + context_->IASetIndexBuffer(pushInds_->Buf(), DXGI_FORMAT_R16_UINT, iOffset); + context_->DrawIndexed(result.drawNumTrans, 0, 0); } else if (result.action == SW_CLEAR) { u32 clearColor = result.color; float clearDepth = result.depth; diff --git a/GPU/Directx9/DrawEngineDX9.cpp b/GPU/Directx9/DrawEngineDX9.cpp index f39969d88afa..46b314dbc8ec 100644 --- a/GPU/Directx9/DrawEngineDX9.cpp +++ b/GPU/Directx9/DrawEngineDX9.cpp @@ -382,7 +382,7 @@ void DrawEngineDX9::DoFlush() { VSShader *vshader = shaderManager_->ApplyShader(false, false, dec_, decOptions_.expandAllWeightsToFloat, true, pipelineState_); - if (result.action == SW_DRAW_PRIMITIVES) { + if (result.action == SW_DRAW_INDEXED) { if (result.setStencil) { dxstate.stencilFunc.set(D3DCMP_ALWAYS); dxstate.stencilRef.set(result.stencilValue); @@ -393,11 +393,7 @@ void DrawEngineDX9::DoFlush() { // Might help for text drawing. device_->SetVertexDeclaration(transformedVertexDecl_); - if (result.drawIndexed) { - device_->DrawIndexedPrimitiveUP(d3d_prim[prim], 0, numDecodedVerts_, D3DPrimCount(d3d_prim[prim], result.drawNumTrans), inds, D3DFMT_INDEX16, result.drawBuffer, sizeof(TransformedVertex)); - } else { - device_->DrawPrimitiveUP(d3d_prim[prim], D3DPrimCount(d3d_prim[prim], result.drawNumTrans), result.drawBuffer, sizeof(TransformedVertex)); - } + device_->DrawIndexedPrimitiveUP(d3d_prim[prim], 0, numDecodedVerts_, D3DPrimCount(d3d_prim[prim], result.drawNumTrans), inds, D3DFMT_INDEX16, result.drawBuffer, sizeof(TransformedVertex)); } else if (result.action == SW_CLEAR) { u32 clearColor = result.color; float clearDepth = result.depth; diff --git a/GPU/GLES/DrawEngineGLES.cpp b/GPU/GLES/DrawEngineGLES.cpp index 74be51f0a793..e4f342af7965 100644 --- a/GPU/GLES/DrawEngineGLES.cpp +++ b/GPU/GLES/DrawEngineGLES.cpp @@ -415,18 +415,12 @@ void DrawEngineGLES::DoFlush() { goto bail; } - if (result.action == SW_DRAW_PRIMITIVES) { - if (result.drawIndexed) { - vertexBufferOffset = (uint32_t)frameData.pushVertex->Push(result.drawBuffer, numDecodedVerts_ * sizeof(TransformedVertex), 4, &vertexBuffer); - indexBufferOffset = (uint32_t)frameData.pushIndex->Push(inds, sizeof(uint16_t) * result.drawNumTrans, 2, &indexBuffer); - render_->DrawIndexed( - softwareInputLayout_, vertexBuffer, vertexBufferOffset, indexBuffer, indexBufferOffset, - glprim[prim], result.drawNumTrans, GL_UNSIGNED_SHORT); - } else { - vertexBufferOffset = (uint32_t)frameData.pushVertex->Push(result.drawBuffer, result.drawNumTrans * sizeof(TransformedVertex), 4, &vertexBuffer); - render_->Draw( - softwareInputLayout_, vertexBuffer, vertexBufferOffset, glprim[prim], 0, result.drawNumTrans); - } + if (result.action == SW_DRAW_INDEXED) { + vertexBufferOffset = (uint32_t)frameData.pushVertex->Push(result.drawBuffer, numDecodedVerts_ * sizeof(TransformedVertex), 4, &vertexBuffer); + indexBufferOffset = (uint32_t)frameData.pushIndex->Push(inds, sizeof(uint16_t) * result.drawNumTrans, 2, &indexBuffer); + render_->DrawIndexed( + softwareInputLayout_, vertexBuffer, vertexBufferOffset, indexBuffer, indexBufferOffset, + glprim[prim], result.drawNumTrans, GL_UNSIGNED_SHORT); } else if (result.action == SW_CLEAR) { u32 clearColor = result.color; float clearDepth = result.depth; diff --git a/GPU/Vulkan/DrawEngineVulkan.cpp b/GPU/Vulkan/DrawEngineVulkan.cpp index 9cb6d752ba49..bdae445a75dc 100644 --- a/GPU/Vulkan/DrawEngineVulkan.cpp +++ b/GPU/Vulkan/DrawEngineVulkan.cpp @@ -448,7 +448,7 @@ void DrawEngineVulkan::DoFlush() { // Only here, where we know whether to clear or to draw primitives, should we actually set the current framebuffer! Because that gives use the opportunity // to use a "pre-clear" render pass, for high efficiency on tilers. - if (result.action == SW_DRAW_PRIMITIVES) { + if (result.action == SW_DRAW_INDEXED) { if (textureNeedsApply) { gstate_c.pixelMapped = result.pixelMapped; textureCache_->ApplyTexture(); @@ -525,16 +525,10 @@ void DrawEngineVulkan::DoFlush() { PROFILE_THIS_SCOPE("renderman_q"); - if (result.drawIndexed) { - VkBuffer vbuf, ibuf; - vbOffset = (uint32_t)pushVertex_->Push(result.drawBuffer, numDecodedVerts_ * sizeof(TransformedVertex), 4, &vbuf); - ibOffset = (uint32_t)pushIndex_->Push(inds, sizeof(short) * result.drawNumTrans, 4, &ibuf); - renderManager->DrawIndexed(descSetIndex, ARRAY_SIZE(dynamicUBOOffsets), dynamicUBOOffsets, vbuf, vbOffset, ibuf, ibOffset, result.drawNumTrans, 1); - } else { - VkBuffer vbuf; - vbOffset = (uint32_t)pushVertex_->Push(result.drawBuffer, result.drawNumTrans * sizeof(TransformedVertex), 4, &vbuf); - renderManager->Draw(descSetIndex, ARRAY_SIZE(dynamicUBOOffsets), dynamicUBOOffsets, vbuf, vbOffset, result.drawNumTrans); - } + VkBuffer vbuf, ibuf; + vbOffset = (uint32_t)pushVertex_->Push(result.drawBuffer, numDecodedVerts_ * sizeof(TransformedVertex), 4, &vbuf); + ibOffset = (uint32_t)pushIndex_->Push(inds, sizeof(short) * result.drawNumTrans, 4, &ibuf); + renderManager->DrawIndexed(descSetIndex, ARRAY_SIZE(dynamicUBOOffsets), dynamicUBOOffsets, vbuf, vbOffset, ibuf, ibOffset, result.drawNumTrans, 1); } else if (result.action == SW_CLEAR) { // Note: we won't get here if the clear is alpha but not color, or color but not alpha. bool clearColor = gstate.isClearModeColorMask(); From 096985f51f64af22053788ed83fbae9fc165d8dc Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Henrik=20Rydg=C3=A5rd?= Date: Wed, 17 Jul 2024 13:26:14 +0200 Subject: [PATCH 4/5] Provoking vertex (software transform): Simpler solution Simply rotate each primitive in the index buffer to simulate a different provoking vertex. Since at this point we have already generated a plain primitive index buffer, it's easy to manipulate like this. An even better solution would be to generate rotated index buffers directly during decode, although that code is super critical and does not need more complexity.. We could now also enable this for hardware transform but I'm leaving that for later. --- GPU/Common/SoftwareTransformCommon.cpp | 99 +++++++++----------------- GPU/Common/SoftwareTransformCommon.h | 6 +- GPU/D3D11/D3D11Util.cpp | 4 +- GPU/D3D11/DrawEngineD3D11.cpp | 7 +- GPU/Directx9/DrawEngineDX9.cpp | 8 ++- GPU/GLES/DrawEngineGLES.cpp | 1 - GPU/Vulkan/DrawEngineVulkan.cpp | 12 ++-- 7 files changed, 63 insertions(+), 74 deletions(-) diff --git a/GPU/Common/SoftwareTransformCommon.cpp b/GPU/Common/SoftwareTransformCommon.cpp index 6e41c1476b53..8d1624000c2b 100644 --- a/GPU/Common/SoftwareTransformCommon.cpp +++ b/GPU/Common/SoftwareTransformCommon.cpp @@ -125,33 +125,6 @@ static bool IsReallyAClear(const TransformedVertex *transformed, int numVerts, f return true; } -static int ColorIndexOffset(int prim, GEShadeMode shadeMode, bool clearMode) { - if (shadeMode != GE_SHADE_FLAT || clearMode) { - return 0; - } - - switch (prim) { - case GE_PRIM_LINES: - case GE_PRIM_LINE_STRIP: - return 1; - - case GE_PRIM_TRIANGLES: - case GE_PRIM_TRIANGLE_STRIP: - return 2; - - case GE_PRIM_TRIANGLE_FAN: - return 1; - - case GE_PRIM_RECTANGLES: - // We already use BR color when expanding, so no need to offset. - return 0; - - default: - break; - } - return 0; -} - void SoftwareTransform::SetProjMatrix(const float mtx[14], bool invertedX, bool invertedY, const Lin::Vec3 &trans, const Lin::Vec3 &scale) { memcpy(&projMatrix_.m, mtx, 16 * sizeof(float)); @@ -202,11 +175,6 @@ void SoftwareTransform::Transform(int prim, u32 vertType, const DecVtxFormat &de fog_slope = std::signbit(fog_slope) ? -65535.0f : 65535.0f; } - int provokeIndOffset = 0; - if (!params_.provokingVertexLast) { - provokeIndOffset = ColorIndexOffset(prim, gstate.getShadeMode(), gstate.isModeClear()); - } - VertexReader reader(decoded, decVtxFormat, vertType); if (throughmode) { const u32 materialAmbientRGBA = gstate.getMaterialAmbientRGBA(); @@ -221,13 +189,7 @@ void SoftwareTransform::Transform(int prim, u32 vertType, const DecVtxFormat &de vert.pos_w = 1.0f; if (hasColor) { - if (provokeIndOffset != 0 && index + provokeIndOffset < numDecodedVerts) { - reader.Goto(index + provokeIndOffset); - vert.color0_32 = reader.ReadColor0_8888(); - reader.Goto(index); - } else { - vert.color0_32 = reader.ReadColor0_8888(); - } + vert.color0_32 = reader.ReadColor0_8888(); } else { vert.color0_32 = materialAmbientRGBA; } @@ -268,10 +230,7 @@ void SoftwareTransform::Transform(int prim, u32 vertType, const DecVtxFormat &de if (reader.hasUV()) reader.ReadUV(ruv); - // Read all the provoking vertex values here. Vec4f unlitColor; - if (provokeIndOffset != 0 && index + provokeIndOffset < numDecodedVerts) - reader.Goto(index + provokeIndOffset); if (reader.hasColor0()) reader.ReadColor0(unlitColor.AsArray()); else @@ -342,34 +301,14 @@ void SoftwareTransform::Transform(int prim, u32 vertType, const DecVtxFormat &de break; case GE_PROJMAP_NORMALIZED_NORMAL: // Use normalized normal as source - // Flat uses the vertex normal, not provoking. - if (provokeIndOffset == 0) { - source = normal.Normalized(cpu_info.bSSE4_1); - } else { - reader.Goto(index); - if (reader.hasNormal()) - reader.ReadNrm(source.AsArray()); - if (gstate.areNormalsReversed()) - source = -source; - source.Normalize(); - } + source = normal.Normalized(cpu_info.bSSE4_1); if (!reader.hasNormal()) { ERROR_LOG_REPORT(Log::G3D, "Normal projection mapping without normal?"); } break; case GE_PROJMAP_NORMAL: // Use non-normalized normal as source! - // Flat uses the vertex normal, not provoking. - if (provokeIndOffset == 0) { - source = normal; - } else { - // Need to read the normal for this vertex and weight it again.. - reader.Goto(index); - if (reader.hasNormal()) - reader.ReadNrm(source.AsArray()); - if (gstate.areNormalsReversed()) - source = -source; - } + source = normal; if (!reader.hasNormal()) { ERROR_LOG_REPORT(Log::G3D, "Normal projection mapping without normal?"); } @@ -751,6 +690,38 @@ bool SoftwareTransform::ExpandRectangles(int vertexCount, int &numDecodedVerts, return true; } +// In-place. So, better not be doing this on GPU memory! +void IndexBufferProvokingLastToFirst(int prim, u16 *inds, int indsSize) { + switch (prim) { + case GE_PRIM_LINES: + // Swap every two indices. + for (int i = 0; i < indsSize - 1; i += 2) { + u16 temp = inds[i]; + inds[i] = inds[i + 1]; + inds[i + 1] = temp; + } + break; + case GE_PRIM_TRIANGLES: + // Rotate the triangle so the last becomes the first, without changing the winding order. + // This could be done with a series of pshufb. + for (int i = 0; i < indsSize - 2; i += 3) { + u16 temp = inds[i + 2]; + inds[i + 2] = inds[i + 1]; + inds[i + 1] = inds[i]; + inds[i] = temp; + } + break; + case GE_PRIM_POINTS: + // Nothing to do, + break; + case GE_PRIM_RECTANGLES: + // Nothing to do, already using the 2nd vertex. + break; + default: + _dbg_assert_msg_(false, "IndexBufferProvokingFirstToLast: Only works with plain indexed primitives, no strips or fans") + } +} + bool SoftwareTransform::ExpandLines(int vertexCount, int &numDecodedVerts, int vertsSize, u16 *&inds, int indsSize, const TransformedVertex *transformed, TransformedVertex *transformedExpanded, int &numTrans, bool throughmode) { // Before we start, do a sanity check - does the output fit? if ((vertexCount / 2) * 6 > indsSize) { diff --git a/GPU/Common/SoftwareTransformCommon.h b/GPU/Common/SoftwareTransformCommon.h index 9896b04bdae9..457fd6a253fc 100644 --- a/GPU/Common/SoftwareTransformCommon.h +++ b/GPU/Common/SoftwareTransformCommon.h @@ -57,9 +57,13 @@ struct SoftwareTransformParams { bool allowSeparateAlphaClear; bool flippedY; bool usesHalfZ; - bool provokingVertexLast; }; +// Converts an index buffer to make the provoking vertex the last. +// In-place. So, better not be doing this on GPU memory! +// TODO: We could do this already during index decode. +void IndexBufferProvokingLastToFirst(int prim, u16 *inds, int indsSize); + class SoftwareTransform { public: SoftwareTransform(SoftwareTransformParams ¶ms) : params_(params) {} diff --git a/GPU/D3D11/D3D11Util.cpp b/GPU/D3D11/D3D11Util.cpp index b136ca57bc6a..f84b188d56b2 100644 --- a/GPU/D3D11/D3D11Util.cpp +++ b/GPU/D3D11/D3D11Util.cpp @@ -36,7 +36,9 @@ std::vector CompileShaderToBytecodeD3D11(const char *code, size_t codeS if (trimmed.find("pow(f, e) will not work for negative f") != std::string::npos) { continue; } - WARN_LOG(Log::G3D, "%.*s", (int)trimmed.length(), trimmed.data()); + if (trimmed.size() > 1) { // ignore single nulls, not sure how they appear. + WARN_LOG(Log::G3D, "%.*s", (int)trimmed.length(), trimmed.data()); + } } } else { ERROR_LOG(Log::G3D, "%s: %s\n\n%s", "errors", errors.c_str(), numberedCode.c_str()); diff --git a/GPU/D3D11/DrawEngineD3D11.cpp b/GPU/D3D11/DrawEngineD3D11.cpp index 37582772c9fb..db0440c6c48b 100644 --- a/GPU/D3D11/DrawEngineD3D11.cpp +++ b/GPU/D3D11/DrawEngineD3D11.cpp @@ -382,10 +382,15 @@ void DrawEngineD3D11::DoFlush() { params.texCache = textureCache_; params.allowClear = true; params.allowSeparateAlphaClear = false; // D3D11 doesn't support separate alpha clears - params.provokingVertexLast = false; params.flippedY = false; params.usesHalfZ = true; + if (gstate.getShadeMode() == GE_SHADE_FLAT) { + // We need to rotate the index buffer to simulate a different provoking vertex. + // We do this before line expansion etc. + IndexBufferProvokingLastToFirst(prim, inds, vertexCount); + } + // We need correct viewport values in gstate_c already. if (gstate_c.IsDirty(DIRTY_VIEWPORTSCISSOR_STATE)) { ViewportAndScissor vpAndScissor; diff --git a/GPU/Directx9/DrawEngineDX9.cpp b/GPU/Directx9/DrawEngineDX9.cpp index 46b314dbc8ec..ce20f88859e3 100644 --- a/GPU/Directx9/DrawEngineDX9.cpp +++ b/GPU/Directx9/DrawEngineDX9.cpp @@ -334,10 +334,16 @@ void DrawEngineDX9::DoFlush() { params.texCache = textureCache_; params.allowClear = true; params.allowSeparateAlphaClear = false; - params.provokingVertexLast = false; params.flippedY = false; params.usesHalfZ = true; + if (gstate.getShadeMode() == GE_SHADE_FLAT) { + // We need to rotate the index buffer to simulate a different provoking vertex. + // We do this before line expansion etc. + int indexCount = RemainingIndices(inds); + IndexBufferProvokingLastToFirst(prim, inds, vertexCount); + } + // We need correct viewport values in gstate_c already. if (gstate_c.IsDirty(DIRTY_VIEWPORTSCISSOR_STATE)) { ViewportAndScissor vpAndScissor; diff --git a/GPU/GLES/DrawEngineGLES.cpp b/GPU/GLES/DrawEngineGLES.cpp index e4f342af7965..e2161572d3b6 100644 --- a/GPU/GLES/DrawEngineGLES.cpp +++ b/GPU/GLES/DrawEngineGLES.cpp @@ -356,7 +356,6 @@ void DrawEngineGLES::DoFlush() { params.texCache = textureCache_; params.allowClear = true; // Clear in OpenGL respects scissor rects, so we'll use it. params.allowSeparateAlphaClear = true; - params.provokingVertexLast = true; params.flippedY = framebufferManager_->UseBufferedRendering(); params.usesHalfZ = false; diff --git a/GPU/Vulkan/DrawEngineVulkan.cpp b/GPU/Vulkan/DrawEngineVulkan.cpp index bdae445a75dc..b03a8556a01b 100644 --- a/GPU/Vulkan/DrawEngineVulkan.cpp +++ b/GPU/Vulkan/DrawEngineVulkan.cpp @@ -405,11 +405,13 @@ void DrawEngineVulkan::DoFlush() { // do not respect scissor rects. params.allowClear = framebufferManager_->UseBufferedRendering(); params.allowSeparateAlphaClear = false; - if (renderManager->GetVulkanContext()->GetDeviceFeatures().enabled.provokingVertex.provokingVertexLast) { - // We can get the OpenGL behavior, no need for workarounds. - params.provokingVertexLast = true; - } else { - params.provokingVertexLast = false; + + if (gstate.getShadeMode() == GE_SHADE_FLAT) { + if (!renderManager->GetVulkanContext()->GetDeviceFeatures().enabled.provokingVertex.provokingVertexLast) { + // If we can't have the hardware do it, we need to rotate the index buffer to simulate a different provoking vertex. + // We do this before line expansion etc. + IndexBufferProvokingLastToFirst(prim, inds, vertexCount); + } } params.flippedY = true; params.usesHalfZ = true; From 7738899434bfc36b6c9a5383c768a071f201531f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Henrik=20Rydg=C3=A5rd?= Date: Wed, 17 Jul 2024 16:11:23 +0200 Subject: [PATCH 5/5] Fix triangle strip vertex order to have a consistent provoking vertex --- GPU/Common/IndexGenerator.cpp | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/GPU/Common/IndexGenerator.cpp b/GPU/Common/IndexGenerator.cpp index e3c042f4c312..482b78f569fb 100644 --- a/GPU/Common/IndexGenerator.cpp +++ b/GPU/Common/IndexGenerator.cpp @@ -86,13 +86,13 @@ void IndexGenerator::AddList(int numVerts, int indexOffset, bool clockwise) { alignas(16) static const u16 offsets_clockwise[24] = { 0, (u16)(0 + 1), (u16)(0 + 2), - 1, (u16)(1 + 2), (u16)(1 + 1), + (u16)(1 + 1), 1, (u16)(1 + 2), 2, (u16)(2 + 1), (u16)(2 + 2), - 3, (u16)(3 + 2), (u16)(3 + 1), + (u16)(3 + 1), 3, (u16)(3 + 2), 4, (u16)(4 + 1), (u16)(4 + 2), - 5, (u16)(5 + 2), (u16)(5 + 1), + (u16)(5 + 1), 5, (u16)(5 + 2), 6, (u16)(6 + 1), (u16)(6 + 2), - 7, (u16)(7 + 2), (u16)(7 + 1), + (u16)(7 + 1), 7, (u16)(7 + 2), }; alignas(16) static const uint16_t offsets_counter_clockwise[24] = {