diff --git a/Common/Math/CrossSIMD.h b/Common/Math/CrossSIMD.h index 89f0acf979cb..db36292c4079 100644 --- a/Common/Math/CrossSIMD.h +++ b/Common/Math/CrossSIMD.h @@ -172,7 +172,7 @@ struct Vec4U16 { Vec4U16 operator ^(Vec4U16 other) const { return Vec4U16{ _mm_xor_si128(v, other.v) }; } Vec4U16 Max(Vec4U16 other) const { return Vec4U16{ _mm_max_epu16_SSE2(v, other.v) }; } - Vec4U16 Min(Vec4U16 other) const { return Vec4U16{ _mm_max_epu16_SSE2(v, other.v) }; } + Vec4U16 Min(Vec4U16 other) const { return Vec4U16{ _mm_min_epu16_SSE2(v, other.v) }; } Vec4U16 CompareLT(Vec4U16 other) { return Vec4U16{ _mm_cmplt_epu16(v, other.v) }; } }; diff --git a/GPU/Common/DepthRaster.cpp b/GPU/Common/DepthRaster.cpp index 4f000c884b51..33bdd987f905 100644 --- a/GPU/Common/DepthRaster.cpp +++ b/GPU/Common/DepthRaster.cpp @@ -199,7 +199,7 @@ TriangleResult DepthRasterTriangle(uint16_t *depthBuf, int stride, int x1, int y switch (compareMode) { case ZCompareMode::Greater: // To implement the greater/greater-than comparison, we can combine mask and max. - // It might be better to do the math in float space on x86 due to SSE2 deficiencies. + // Unfortunately there's no unsigned max on SSE2, it's synthesized by xoring 0x8000 on input and output. // We use AndNot to zero out Z results, before doing Max with the buffer. AndNot(shortZ, shortMaskInv).Max(bufferValues).Store(rowPtr + x); break; diff --git a/GPU/D3D11/DrawEngineD3D11.cpp b/GPU/D3D11/DrawEngineD3D11.cpp index acc41fcb2202..a1238172bd2c 100644 --- a/GPU/D3D11/DrawEngineD3D11.cpp +++ b/GPU/D3D11/DrawEngineD3D11.cpp @@ -335,6 +335,9 @@ void DrawEngineD3D11::Flush() { context_->Draw(vertexCount, 0); } } + if (useDepthRaster_) { + DepthRasterTransform(prim, dec_, dec_->VertexType(), vertexCount); + } } else { PROFILE_THIS_SCOPE("soft"); VertexDecoder *swDec = dec_; @@ -388,6 +391,13 @@ void DrawEngineD3D11::Flush() { UpdateCachedViewportState(vpAndScissor); } + // At this point, rect and line primitives are still preserved as such. So, it's the best time to do software depth raster. + // We could piggyback on the viewport transform below, but it gets complicated since it's different per-backend. Which we really + // should clean up one day... + if (useDepthRaster_) { + DepthRasterPredecoded(prim, decoded_, numDecodedVerts_, swDec, vertexCount); + } + SoftwareTransform swTransform(params); const Lin::Vec3 trans(gstate_c.vpXOffset, -gstate_c.vpYOffset, gstate_c.vpZOffset * 0.5f + 0.5f); diff --git a/GPU/Directx9/DrawEngineDX9.cpp b/GPU/Directx9/DrawEngineDX9.cpp index a0cd0c5733f8..e6d7d3ab514b 100644 --- a/GPU/Directx9/DrawEngineDX9.cpp +++ b/GPU/Directx9/DrawEngineDX9.cpp @@ -292,6 +292,9 @@ void DrawEngineDX9::Flush() { } } } + if (useDepthRaster_) { + DepthRasterTransform(prim, dec_, dec_->VertexType(), vertexCount); + } } else { VertexDecoder *swDec = dec_; if (swDec->nweights != 0) { @@ -344,6 +347,13 @@ void DrawEngineDX9::Flush() { UpdateCachedViewportState(vpAndScissor); } + // At this point, rect and line primitives are still preserved as such. So, it's the best time to do software depth raster. + // We could piggyback on the viewport transform below, but it gets complicated since it's different per-backend. Which we really + // should clean up one day... + if (useDepthRaster_) { + DepthRasterPredecoded(prim, decoded_, numDecodedVerts_, swDec, vertexCount); + } + int maxIndex = numDecodedVerts_; SoftwareTransform swTransform(params); diff --git a/GPU/GLES/DrawEngineGLES.cpp b/GPU/GLES/DrawEngineGLES.cpp index 93f8408f50b2..42fed50d2ea3 100644 --- a/GPU/GLES/DrawEngineGLES.cpp +++ b/GPU/GLES/DrawEngineGLES.cpp @@ -315,6 +315,9 @@ void DrawEngineGLES::Flush() { inputLayout, vertexBuffer, vertexBufferOffset, glprim[prim], 0, vertexCount); } + if (useDepthRaster_) { + DepthRasterTransform(prim, dec_, dec_->VertexType(), vertexCount); + } } else { PROFILE_THIS_SCOPE("soft"); VertexDecoder *swDec = dec_; @@ -371,6 +374,13 @@ void DrawEngineGLES::Flush() { } } + // At this point, rect and line primitives are still preserved as such. So, it's the best time to do software depth raster. + // We could piggyback on the viewport transform below, but it gets complicated since it's different per-backend. Which we really + // should clean up one day... + if (useDepthRaster_) { + DepthRasterPredecoded(prim, decoded_, numDecodedVerts_, swDec, vertexCount); + } + SoftwareTransform swTransform(params); const Lin::Vec3 trans(gstate_c.vpXOffset, gstate_c.vpYOffset, gstate_c.vpZOffset); diff --git a/UWP/lua/lua.cpp b/UWP/lua/lua.cpp deleted file mode 100644 index b0208c09c457..000000000000 --- a/UWP/lua/lua.cpp +++ /dev/null @@ -1,2 +0,0 @@ -#include "pch.h" -#include "lua.h" diff --git a/UWP/lua/lua.h b/UWP/lua/lua.h deleted file mode 100644 index 73b4b8665005..000000000000 --- a/UWP/lua/lua.h +++ /dev/null @@ -1 +0,0 @@ -#pragma once diff --git a/UWP/lua/pch.cpp b/UWP/lua/pch.cpp deleted file mode 100644 index bcb5590be1b3..000000000000 --- a/UWP/lua/pch.cpp +++ /dev/null @@ -1 +0,0 @@ -#include "pch.h" diff --git a/UWP/lua/pch.h b/UWP/lua/pch.h deleted file mode 100644 index 529bbb17fec3..000000000000 --- a/UWP/lua/pch.h +++ /dev/null @@ -1,9 +0,0 @@ -#pragma once - -#include "targetver.h" - -#ifndef WIN32_LEAN_AND_MEAN -#define WIN32_LEAN_AND_MEAN -#endif - -#include diff --git a/UWP/lua/targetver.h b/UWP/lua/targetver.h deleted file mode 100644 index a66ecb00f153..000000000000 --- a/UWP/lua/targetver.h +++ /dev/null @@ -1,8 +0,0 @@ -#pragma once - -// Including SDKDDKVer.h defines the highest available Windows platform. - -// If you wish to build your application for a previous Windows platform, include WinSDKVer.h and -// set the _WIN32_WINNT macro to the platform you wish to support before including SDKDDKVer.h. - -#include