diff --git a/Common/Math/CrossSIMD.h b/Common/Math/CrossSIMD.h
index 89f0acf979cb..db36292c4079 100644
--- a/Common/Math/CrossSIMD.h
+++ b/Common/Math/CrossSIMD.h
@@ -172,7 +172,7 @@ struct Vec4U16 {
 	Vec4U16 operator ^(Vec4U16 other) const { return Vec4U16{ _mm_xor_si128(v, other.v) }; }
 
 	Vec4U16 Max(Vec4U16 other) const { return Vec4U16{ _mm_max_epu16_SSE2(v, other.v) }; }
-	Vec4U16 Min(Vec4U16 other) const { return Vec4U16{ _mm_max_epu16_SSE2(v, other.v) }; }
+	Vec4U16 Min(Vec4U16 other) const { return Vec4U16{ _mm_min_epu16_SSE2(v, other.v) }; }
 	Vec4U16 CompareLT(Vec4U16 other) { return Vec4U16{ _mm_cmplt_epu16(v, other.v) }; }
 };
 
diff --git a/GPU/Common/DepthRaster.cpp b/GPU/Common/DepthRaster.cpp
index 4f000c884b51..33bdd987f905 100644
--- a/GPU/Common/DepthRaster.cpp
+++ b/GPU/Common/DepthRaster.cpp
@@ -199,7 +199,7 @@ TriangleResult DepthRasterTriangle(uint16_t *depthBuf, int stride, int x1, int y
 			switch (compareMode) {
 			case ZCompareMode::Greater:
 				// To implement the greater/greater-than comparison, we can combine mask and max.
-				// It might be better to do the math in float space on x86 due to SSE2 deficiencies.
+				// Unfortunately there's no unsigned max on SSE2, it's synthesized by xoring 0x8000 on input and output.
 				// We use AndNot to zero out Z results, before doing Max with the buffer.
 				AndNot(shortZ, shortMaskInv).Max(bufferValues).Store(rowPtr + x);
 				break;
diff --git a/GPU/D3D11/DrawEngineD3D11.cpp b/GPU/D3D11/DrawEngineD3D11.cpp
index acc41fcb2202..a1238172bd2c 100644
--- a/GPU/D3D11/DrawEngineD3D11.cpp
+++ b/GPU/D3D11/DrawEngineD3D11.cpp
@@ -335,6 +335,9 @@ void DrawEngineD3D11::Flush() {
 				context_->Draw(vertexCount, 0);
 			}
 		}
+		if (useDepthRaster_) {
+			DepthRasterTransform(prim, dec_, dec_->VertexType(), vertexCount);
+		}
 	} else {
 		PROFILE_THIS_SCOPE("soft");
 		VertexDecoder *swDec = dec_;
@@ -388,6 +391,13 @@ void DrawEngineD3D11::Flush() {
 			UpdateCachedViewportState(vpAndScissor);
 		}
 
+		// At this point, rect and line primitives are still preserved as such. So, it's the best time to do software depth raster.
+		// We could piggyback on the viewport transform below, but it gets complicated since it's different per-backend. Which we really
+		// should clean up one day...
+		if (useDepthRaster_) {
+			DepthRasterPredecoded(prim, decoded_, numDecodedVerts_, swDec, vertexCount);
+		}
+
 		SoftwareTransform swTransform(params);
 
 		const Lin::Vec3 trans(gstate_c.vpXOffset, -gstate_c.vpYOffset, gstate_c.vpZOffset * 0.5f + 0.5f);
diff --git a/GPU/Directx9/DrawEngineDX9.cpp b/GPU/Directx9/DrawEngineDX9.cpp
index a0cd0c5733f8..e6d7d3ab514b 100644
--- a/GPU/Directx9/DrawEngineDX9.cpp
+++ b/GPU/Directx9/DrawEngineDX9.cpp
@@ -292,6 +292,9 @@ void DrawEngineDX9::Flush() {
 				}
 			}
 		}
+		if (useDepthRaster_) {
+			DepthRasterTransform(prim, dec_, dec_->VertexType(), vertexCount);
+		}
 	} else {
 		VertexDecoder *swDec = dec_;
 		if (swDec->nweights != 0) {
@@ -344,6 +347,13 @@ void DrawEngineDX9::Flush() {
 			UpdateCachedViewportState(vpAndScissor);
 		}
 
+		// At this point, rect and line primitives are still preserved as such. So, it's the best time to do software depth raster.
+		// We could piggyback on the viewport transform below, but it gets complicated since it's different per-backend. Which we really
+		// should clean up one day...
+		if (useDepthRaster_) {
+			DepthRasterPredecoded(prim, decoded_, numDecodedVerts_, swDec, vertexCount);
+		}
+
 		int maxIndex = numDecodedVerts_;
 		SoftwareTransform swTransform(params);
 
diff --git a/GPU/GLES/DrawEngineGLES.cpp b/GPU/GLES/DrawEngineGLES.cpp
index 93f8408f50b2..42fed50d2ea3 100644
--- a/GPU/GLES/DrawEngineGLES.cpp
+++ b/GPU/GLES/DrawEngineGLES.cpp
@@ -315,6 +315,9 @@ void DrawEngineGLES::Flush() {
 				inputLayout, vertexBuffer, vertexBufferOffset,
 				glprim[prim], 0, vertexCount);
 		}
+		if (useDepthRaster_) {
+			DepthRasterTransform(prim, dec_, dec_->VertexType(), vertexCount);
+		}
 	} else {
 		PROFILE_THIS_SCOPE("soft");
 		VertexDecoder *swDec = dec_;
@@ -371,6 +374,13 @@ void DrawEngineGLES::Flush() {
 			}
 		}
 
+		// At this point, rect and line primitives are still preserved as such. So, it's the best time to do software depth raster.
+		// We could piggyback on the viewport transform below, but it gets complicated since it's different per-backend. Which we really
+		// should clean up one day...
+		if (useDepthRaster_) {
+			DepthRasterPredecoded(prim, decoded_, numDecodedVerts_, swDec, vertexCount);
+		}
+
 		SoftwareTransform swTransform(params);
 
 		const Lin::Vec3 trans(gstate_c.vpXOffset, gstate_c.vpYOffset, gstate_c.vpZOffset);
diff --git a/UWP/lua/lua.cpp b/UWP/lua/lua.cpp
deleted file mode 100644
index b0208c09c457..000000000000
--- a/UWP/lua/lua.cpp
+++ /dev/null
@@ -1,2 +0,0 @@
-﻿#include "pch.h"
-#include "lua.h"
diff --git a/UWP/lua/lua.h b/UWP/lua/lua.h
deleted file mode 100644
index 73b4b8665005..000000000000
--- a/UWP/lua/lua.h
+++ /dev/null
@@ -1 +0,0 @@
-﻿#pragma once
diff --git a/UWP/lua/pch.cpp b/UWP/lua/pch.cpp
deleted file mode 100644
index bcb5590be1b3..000000000000
--- a/UWP/lua/pch.cpp
+++ /dev/null
@@ -1 +0,0 @@
-﻿#include "pch.h"
diff --git a/UWP/lua/pch.h b/UWP/lua/pch.h
deleted file mode 100644
index 529bbb17fec3..000000000000
--- a/UWP/lua/pch.h
+++ /dev/null
@@ -1,9 +0,0 @@
-﻿#pragma once
-
-#include "targetver.h"
-
-#ifndef WIN32_LEAN_AND_MEAN
-#define WIN32_LEAN_AND_MEAN
-#endif
-
-#include <windows.h>
diff --git a/UWP/lua/targetver.h b/UWP/lua/targetver.h
deleted file mode 100644
index a66ecb00f153..000000000000
--- a/UWP/lua/targetver.h
+++ /dev/null
@@ -1,8 +0,0 @@
-﻿#pragma once
-
-// Including SDKDDKVer.h defines the highest available Windows platform.
-
-// If you wish to build your application for a previous Windows platform, include WinSDKVer.h and
-// set the _WIN32_WINNT macro to the platform you wish to support before including SDKDDKVer.h.
-
-#include <SDKDDKVer.h>