Skip to content

Commit 538a64a

Browse files
ns6089ClassicOldSong
authored andcommitted
fix(win/video): don't offload chroma subsampling math to texture sampler when downscaling (LizardByte#3014)
* Don't use sampler math for chroma if downscaling * Correct portrait rotation offsets
1 parent 4353599 commit 538a64a

13 files changed

+177
-93
lines changed

src/platform/windows/display_vram.cpp

Lines changed: 112 additions & 77 deletions
Original file line numberDiff line numberDiff line change
@@ -107,6 +107,10 @@ namespace platf::dxgi {
107107
blob_t convert_yuv420_packed_uv_type0_ps_linear_hlsl;
108108
blob_t convert_yuv420_packed_uv_type0_ps_perceptual_quantizer_hlsl;
109109
blob_t convert_yuv420_packed_uv_type0_vs_hlsl;
110+
blob_t convert_yuv420_packed_uv_type0s_ps_hlsl;
111+
blob_t convert_yuv420_packed_uv_type0s_ps_linear_hlsl;
112+
blob_t convert_yuv420_packed_uv_type0s_ps_perceptual_quantizer_hlsl;
113+
blob_t convert_yuv420_packed_uv_type0s_vs_hlsl;
110114
blob_t convert_yuv420_planar_y_ps_hlsl;
111115
blob_t convert_yuv420_planar_y_ps_linear_hlsl;
112116
blob_t convert_yuv420_planar_y_ps_perceptual_quantizer_hlsl;
@@ -488,6 +492,110 @@ namespace platf::dxgi {
488492
frame_texture->AddRef();
489493
output_texture.reset(frame_texture);
490494

495+
HRESULT status = S_OK;
496+
497+
#define create_vertex_shader_helper(x, y) \
498+
if (FAILED(status = device->CreateVertexShader(x->GetBufferPointer(), x->GetBufferSize(), nullptr, &y))) { \
499+
BOOST_LOG(error) << "Failed to create vertex shader " << #x << ": " << util::log_hex(status); \
500+
return -1; \
501+
}
502+
#define create_pixel_shader_helper(x, y) \
503+
if (FAILED(status = device->CreatePixelShader(x->GetBufferPointer(), x->GetBufferSize(), nullptr, &y))) { \
504+
BOOST_LOG(error) << "Failed to create pixel shader " << #x << ": " << util::log_hex(status); \
505+
return -1; \
506+
}
507+
508+
const bool downscaling = display->width > width || display->height > height;
509+
510+
switch (format) {
511+
case DXGI_FORMAT_NV12:
512+
// Semi-planar 8-bit YUV 4:2:0
513+
create_vertex_shader_helper(convert_yuv420_planar_y_vs_hlsl, convert_Y_or_YUV_vs);
514+
create_pixel_shader_helper(convert_yuv420_planar_y_ps_hlsl, convert_Y_or_YUV_ps);
515+
create_pixel_shader_helper(convert_yuv420_planar_y_ps_linear_hlsl, convert_Y_or_YUV_fp16_ps);
516+
if (downscaling) {
517+
create_vertex_shader_helper(convert_yuv420_packed_uv_type0s_vs_hlsl, convert_UV_vs);
518+
create_pixel_shader_helper(convert_yuv420_packed_uv_type0s_ps_hlsl, convert_UV_ps);
519+
create_pixel_shader_helper(convert_yuv420_packed_uv_type0s_ps_linear_hlsl, convert_UV_fp16_ps);
520+
}
521+
else {
522+
create_vertex_shader_helper(convert_yuv420_packed_uv_type0_vs_hlsl, convert_UV_vs);
523+
create_pixel_shader_helper(convert_yuv420_packed_uv_type0_ps_hlsl, convert_UV_ps);
524+
create_pixel_shader_helper(convert_yuv420_packed_uv_type0_ps_linear_hlsl, convert_UV_fp16_ps);
525+
}
526+
break;
527+
528+
case DXGI_FORMAT_P010:
529+
// Semi-planar 16-bit YUV 4:2:0, 10 most significant bits store the value
530+
create_vertex_shader_helper(convert_yuv420_planar_y_vs_hlsl, convert_Y_or_YUV_vs);
531+
create_pixel_shader_helper(convert_yuv420_planar_y_ps_hlsl, convert_Y_or_YUV_ps);
532+
if (display->is_hdr()) {
533+
create_pixel_shader_helper(convert_yuv420_planar_y_ps_perceptual_quantizer_hlsl, convert_Y_or_YUV_fp16_ps);
534+
}
535+
else {
536+
create_pixel_shader_helper(convert_yuv420_planar_y_ps_linear_hlsl, convert_Y_or_YUV_fp16_ps);
537+
}
538+
if (downscaling) {
539+
create_vertex_shader_helper(convert_yuv420_packed_uv_type0s_vs_hlsl, convert_UV_vs);
540+
create_pixel_shader_helper(convert_yuv420_packed_uv_type0s_ps_hlsl, convert_UV_ps);
541+
if (display->is_hdr()) {
542+
create_pixel_shader_helper(convert_yuv420_packed_uv_type0s_ps_perceptual_quantizer_hlsl, convert_UV_fp16_ps);
543+
}
544+
else {
545+
create_pixel_shader_helper(convert_yuv420_packed_uv_type0s_ps_linear_hlsl, convert_UV_fp16_ps);
546+
}
547+
}
548+
else {
549+
create_vertex_shader_helper(convert_yuv420_packed_uv_type0_vs_hlsl, convert_UV_vs);
550+
create_pixel_shader_helper(convert_yuv420_packed_uv_type0_ps_hlsl, convert_UV_ps);
551+
if (display->is_hdr()) {
552+
create_pixel_shader_helper(convert_yuv420_packed_uv_type0_ps_perceptual_quantizer_hlsl, convert_UV_fp16_ps);
553+
}
554+
else {
555+
create_pixel_shader_helper(convert_yuv420_packed_uv_type0_ps_linear_hlsl, convert_UV_fp16_ps);
556+
}
557+
}
558+
break;
559+
560+
case DXGI_FORMAT_R16_UINT:
561+
// Planar 16-bit YUV 4:4:4, 10 most significant bits store the value
562+
create_vertex_shader_helper(convert_yuv444_planar_vs_hlsl, convert_Y_or_YUV_vs);
563+
create_pixel_shader_helper(convert_yuv444_planar_ps_hlsl, convert_Y_or_YUV_ps);
564+
if (display->is_hdr()) {
565+
create_pixel_shader_helper(convert_yuv444_planar_ps_perceptual_quantizer_hlsl, convert_Y_or_YUV_fp16_ps);
566+
}
567+
else {
568+
create_pixel_shader_helper(convert_yuv444_planar_ps_linear_hlsl, convert_Y_or_YUV_fp16_ps);
569+
}
570+
break;
571+
572+
case DXGI_FORMAT_AYUV:
573+
// Packed 8-bit YUV 4:4:4
574+
create_vertex_shader_helper(convert_yuv444_packed_vs_hlsl, convert_Y_or_YUV_vs);
575+
create_pixel_shader_helper(convert_yuv444_packed_ayuv_ps_hlsl, convert_Y_or_YUV_ps);
576+
create_pixel_shader_helper(convert_yuv444_packed_ayuv_ps_linear_hlsl, convert_Y_or_YUV_fp16_ps);
577+
break;
578+
579+
case DXGI_FORMAT_Y410:
580+
// Packed 10-bit YUV 4:4:4
581+
create_vertex_shader_helper(convert_yuv444_packed_vs_hlsl, convert_Y_or_YUV_vs);
582+
create_pixel_shader_helper(convert_yuv444_packed_y410_ps_hlsl, convert_Y_or_YUV_ps);
583+
if (display->is_hdr()) {
584+
create_pixel_shader_helper(convert_yuv444_packed_y410_ps_perceptual_quantizer_hlsl, convert_Y_or_YUV_fp16_ps);
585+
}
586+
else {
587+
create_pixel_shader_helper(convert_yuv444_packed_y410_ps_linear_hlsl, convert_Y_or_YUV_fp16_ps);
588+
}
589+
break;
590+
591+
default:
592+
BOOST_LOG(error) << "Unable to create shaders because of the unrecognized surface format";
593+
return -1;
594+
}
595+
596+
#undef create_vertex_shader_helper
597+
#undef create_pixel_shader_helper
598+
491599
auto out_width = width;
492600
auto out_height = height;
493601

@@ -676,83 +784,6 @@ namespace platf::dxgi {
676784
BOOST_LOG(warning) << "Failed to increase encoding GPU thread priority. Please run application as administrator for optimal performance.";
677785
}
678786

679-
#define create_vertex_shader_helper(x, y) \
680-
if (FAILED(status = device->CreateVertexShader(x->GetBufferPointer(), x->GetBufferSize(), nullptr, &y))) { \
681-
BOOST_LOG(error) << "Failed to create vertex shader " << #x << ": " << util::log_hex(status); \
682-
return -1; \
683-
}
684-
#define create_pixel_shader_helper(x, y) \
685-
if (FAILED(status = device->CreatePixelShader(x->GetBufferPointer(), x->GetBufferSize(), nullptr, &y))) { \
686-
BOOST_LOG(error) << "Failed to create pixel shader " << #x << ": " << util::log_hex(status); \
687-
return -1; \
688-
}
689-
690-
switch (format) {
691-
case DXGI_FORMAT_NV12:
692-
// Semi-planar 8-bit YUV 4:2:0
693-
create_vertex_shader_helper(convert_yuv420_planar_y_vs_hlsl, convert_Y_or_YUV_vs);
694-
create_pixel_shader_helper(convert_yuv420_planar_y_ps_hlsl, convert_Y_or_YUV_ps);
695-
create_pixel_shader_helper(convert_yuv420_planar_y_ps_linear_hlsl, convert_Y_or_YUV_fp16_ps);
696-
create_vertex_shader_helper(convert_yuv420_packed_uv_type0_vs_hlsl, convert_UV_vs);
697-
create_pixel_shader_helper(convert_yuv420_packed_uv_type0_ps_hlsl, convert_UV_ps);
698-
create_pixel_shader_helper(convert_yuv420_packed_uv_type0_ps_linear_hlsl, convert_UV_fp16_ps);
699-
break;
700-
701-
case DXGI_FORMAT_P010:
702-
// Semi-planar 16-bit YUV 4:2:0, 10 most significant bits store the value
703-
create_vertex_shader_helper(convert_yuv420_planar_y_vs_hlsl, convert_Y_or_YUV_vs);
704-
create_pixel_shader_helper(convert_yuv420_planar_y_ps_hlsl, convert_Y_or_YUV_ps);
705-
create_vertex_shader_helper(convert_yuv420_packed_uv_type0_vs_hlsl, convert_UV_vs);
706-
create_pixel_shader_helper(convert_yuv420_packed_uv_type0_ps_hlsl, convert_UV_ps);
707-
if (display->is_hdr()) {
708-
create_pixel_shader_helper(convert_yuv420_planar_y_ps_perceptual_quantizer_hlsl, convert_Y_or_YUV_fp16_ps);
709-
create_pixel_shader_helper(convert_yuv420_packed_uv_type0_ps_perceptual_quantizer_hlsl, convert_UV_fp16_ps);
710-
}
711-
else {
712-
create_pixel_shader_helper(convert_yuv420_planar_y_ps_linear_hlsl, convert_Y_or_YUV_fp16_ps);
713-
create_pixel_shader_helper(convert_yuv420_packed_uv_type0_ps_linear_hlsl, convert_UV_fp16_ps);
714-
}
715-
break;
716-
717-
case DXGI_FORMAT_R16_UINT:
718-
// Planar 16-bit YUV 4:4:4, 10 most significant bits store the value
719-
create_vertex_shader_helper(convert_yuv444_planar_vs_hlsl, convert_Y_or_YUV_vs);
720-
create_pixel_shader_helper(convert_yuv444_planar_ps_hlsl, convert_Y_or_YUV_ps);
721-
if (display->is_hdr()) {
722-
create_pixel_shader_helper(convert_yuv444_planar_ps_perceptual_quantizer_hlsl, convert_Y_or_YUV_fp16_ps);
723-
}
724-
else {
725-
create_pixel_shader_helper(convert_yuv444_planar_ps_linear_hlsl, convert_Y_or_YUV_fp16_ps);
726-
}
727-
break;
728-
729-
case DXGI_FORMAT_AYUV:
730-
// Packed 8-bit YUV 4:4:4
731-
create_vertex_shader_helper(convert_yuv444_packed_vs_hlsl, convert_Y_or_YUV_vs);
732-
create_pixel_shader_helper(convert_yuv444_packed_ayuv_ps_hlsl, convert_Y_or_YUV_ps);
733-
create_pixel_shader_helper(convert_yuv444_packed_ayuv_ps_linear_hlsl, convert_Y_or_YUV_fp16_ps);
734-
break;
735-
736-
case DXGI_FORMAT_Y410:
737-
// Packed 10-bit YUV 4:4:4
738-
create_vertex_shader_helper(convert_yuv444_packed_vs_hlsl, convert_Y_or_YUV_vs);
739-
create_pixel_shader_helper(convert_yuv444_packed_y410_ps_hlsl, convert_Y_or_YUV_ps);
740-
if (display->is_hdr()) {
741-
create_pixel_shader_helper(convert_yuv444_packed_y410_ps_perceptual_quantizer_hlsl, convert_Y_or_YUV_fp16_ps);
742-
}
743-
else {
744-
create_pixel_shader_helper(convert_yuv444_packed_y410_ps_linear_hlsl, convert_Y_or_YUV_fp16_ps);
745-
}
746-
break;
747-
748-
default:
749-
BOOST_LOG(error) << "Unable to create shaders because of the unrecognized surface format";
750-
return -1;
751-
}
752-
753-
#undef create_vertex_shader_helper
754-
#undef create_pixel_shader_helper
755-
756787
auto default_color_vectors = ::video::color_vectors_from_colorspace(::video::colorspace_e::rec601, false);
757788
if (!default_color_vectors) {
758789
BOOST_LOG(error) << "Missing color vectors for Rec. 601"sv;
@@ -1923,6 +1954,10 @@ namespace platf::dxgi {
19231954
compile_pixel_shader_helper(convert_yuv420_packed_uv_type0_ps_linear);
19241955
compile_pixel_shader_helper(convert_yuv420_packed_uv_type0_ps_perceptual_quantizer);
19251956
compile_vertex_shader_helper(convert_yuv420_packed_uv_type0_vs);
1957+
compile_pixel_shader_helper(convert_yuv420_packed_uv_type0s_ps);
1958+
compile_pixel_shader_helper(convert_yuv420_packed_uv_type0s_ps_linear);
1959+
compile_pixel_shader_helper(convert_yuv420_packed_uv_type0s_ps_perceptual_quantizer);
1960+
compile_vertex_shader_helper(convert_yuv420_packed_uv_type0s_vs);
19261961
compile_pixel_shader_helper(convert_yuv420_planar_y_ps);
19271962
compile_pixel_shader_helper(convert_yuv420_planar_y_ps_linear);
19281963
compile_pixel_shader_helper(convert_yuv420_planar_y_ps_perceptual_quantizer);

src_assets/windows/assets/shaders/directx/convert_yuv420_packed_uv_type0_vs.hlsl

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -11,5 +11,5 @@ cbuffer rotate_texture_steps_cbuffer : register(b1) {
1111

1212
vertex_t main_vs(uint vertex_id : SV_VertexID)
1313
{
14-
return generate_fullscreen_triangle_vertex(vertex_id, subsample_offset.x, rotate_texture_steps);
14+
return generate_fullscreen_triangle_vertex(vertex_id, subsample_offset, rotate_texture_steps);
1515
}
Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,5 @@
1+
#include "include/convert_base.hlsl"
2+
3+
#define LEFT_SUBSAMPLING_SCALE
4+
5+
#include "include/convert_yuv420_packed_uv_ps_base.hlsl"
Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,5 @@
1+
#include "include/convert_linear_base.hlsl"
2+
3+
#define LEFT_SUBSAMPLING_SCALE
4+
5+
#include "include/convert_yuv420_packed_uv_ps_base.hlsl"
Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,5 @@
1+
#include "include/convert_perceptual_quantizer_base.hlsl"
2+
3+
#define LEFT_SUBSAMPLING_SCALE
4+
5+
#include "include/convert_yuv420_packed_uv_ps_base.hlsl"
Lines changed: 15 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,15 @@
1+
cbuffer subsample_offset_cbuffer : register(b0) {
2+
float2 subsample_offset;
3+
};
4+
5+
cbuffer rotate_texture_steps_cbuffer : register(b1) {
6+
int rotate_texture_steps;
7+
};
8+
9+
#define LEFT_SUBSAMPLING_SCALE
10+
#include "include/base_vs.hlsl"
11+
12+
vertex_t main_vs(uint vertex_id : SV_VertexID)
13+
{
14+
return generate_fullscreen_triangle_vertex(vertex_id, subsample_offset, rotate_texture_steps);
15+
}

src_assets/windows/assets/shaders/directx/convert_yuv420_planar_y_vs.hlsl

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -6,5 +6,5 @@ cbuffer rotate_texture_steps_cbuffer : register(b1) {
66

77
vertex_t main_vs(uint vertex_id : SV_VertexID)
88
{
9-
return generate_fullscreen_triangle_vertex(vertex_id, rotate_texture_steps);
9+
return generate_fullscreen_triangle_vertex(vertex_id, float2(0, 0), rotate_texture_steps);
1010
}

src_assets/windows/assets/shaders/directx/convert_yuv444_packed_vs.hlsl

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -6,5 +6,5 @@ cbuffer rotate_texture_steps_cbuffer : register(b1) {
66

77
vertex_t main_vs(uint vertex_id : SV_VertexID)
88
{
9-
return generate_fullscreen_triangle_vertex(vertex_id, rotate_texture_steps);
9+
return generate_fullscreen_triangle_vertex(vertex_id, float2(0, 0), rotate_texture_steps);
1010
}

src_assets/windows/assets/shaders/directx/convert_yuv444_planar_vs.hlsl

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -15,7 +15,7 @@ cbuffer color_matrix_cbuffer : register(b3) {
1515

1616
vertex_t main_vs(uint vertex_id : SV_VertexID)
1717
{
18-
vertex_t output = generate_fullscreen_triangle_vertex(vertex_id % 3, rotate_texture_steps);
18+
vertex_t output = generate_fullscreen_triangle_vertex(vertex_id % 3, float2(0, 0), rotate_texture_steps);
1919

2020
output.viewport = vertex_id / 3;
2121

src_assets/windows/assets/shaders/directx/cursor_vs.hlsl

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -6,5 +6,5 @@ cbuffer rotate_texture_steps_cbuffer : register(b2) {
66

77
vertex_t main_vs(uint vertex_id : SV_VertexID)
88
{
9-
return generate_fullscreen_triangle_vertex(vertex_id, rotate_texture_steps);
9+
return generate_fullscreen_triangle_vertex(vertex_id, float2(0, 0), rotate_texture_steps);
1010
}

0 commit comments

Comments
 (0)