Skip to content

Commit 5f86e5e

Browse files
committed
Don't use sampler math for chroma if downscaling
1 parent 69a5dd2 commit 5f86e5e

File tree

8 files changed

+167
-81
lines changed

8 files changed

+167
-81
lines changed

src/platform/windows/display_vram.cpp

Lines changed: 112 additions & 77 deletions
Original file line numberDiff line numberDiff line change
@@ -107,6 +107,10 @@ namespace platf::dxgi {
107107
blob_t convert_yuv420_packed_uv_type0_ps_linear_hlsl;
108108
blob_t convert_yuv420_packed_uv_type0_ps_perceptual_quantizer_hlsl;
109109
blob_t convert_yuv420_packed_uv_type0_vs_hlsl;
110+
blob_t convert_yuv420_packed_uv_type0s_ps_hlsl;
111+
blob_t convert_yuv420_packed_uv_type0s_ps_linear_hlsl;
112+
blob_t convert_yuv420_packed_uv_type0s_ps_perceptual_quantizer_hlsl;
113+
blob_t convert_yuv420_packed_uv_type0s_vs_hlsl;
110114
blob_t convert_yuv420_planar_y_ps_hlsl;
111115
blob_t convert_yuv420_planar_y_ps_linear_hlsl;
112116
blob_t convert_yuv420_planar_y_ps_perceptual_quantizer_hlsl;
@@ -488,6 +492,110 @@ namespace platf::dxgi {
488492
frame_texture->AddRef();
489493
output_texture.reset(frame_texture);
490494

495+
HRESULT status = S_OK;
496+
497+
#define create_vertex_shader_helper(x, y) \
498+
if (FAILED(status = device->CreateVertexShader(x->GetBufferPointer(), x->GetBufferSize(), nullptr, &y))) { \
499+
BOOST_LOG(error) << "Failed to create vertex shader " << #x << ": " << util::log_hex(status); \
500+
return -1; \
501+
}
502+
#define create_pixel_shader_helper(x, y) \
503+
if (FAILED(status = device->CreatePixelShader(x->GetBufferPointer(), x->GetBufferSize(), nullptr, &y))) { \
504+
BOOST_LOG(error) << "Failed to create pixel shader " << #x << ": " << util::log_hex(status); \
505+
return -1; \
506+
}
507+
508+
const bool downscaling = display->width != width || display->height != height;
509+
510+
switch (format) {
511+
case DXGI_FORMAT_NV12:
512+
// Semi-planar 8-bit YUV 4:2:0
513+
create_vertex_shader_helper(convert_yuv420_planar_y_vs_hlsl, convert_Y_or_YUV_vs);
514+
create_pixel_shader_helper(convert_yuv420_planar_y_ps_hlsl, convert_Y_or_YUV_ps);
515+
create_pixel_shader_helper(convert_yuv420_planar_y_ps_linear_hlsl, convert_Y_or_YUV_fp16_ps);
516+
if (downscaling) {
517+
create_vertex_shader_helper(convert_yuv420_packed_uv_type0s_vs_hlsl, convert_UV_vs);
518+
create_pixel_shader_helper(convert_yuv420_packed_uv_type0s_ps_hlsl, convert_UV_ps);
519+
create_pixel_shader_helper(convert_yuv420_packed_uv_type0s_ps_linear_hlsl, convert_UV_fp16_ps);
520+
}
521+
else {
522+
create_vertex_shader_helper(convert_yuv420_packed_uv_type0_vs_hlsl, convert_UV_vs);
523+
create_pixel_shader_helper(convert_yuv420_packed_uv_type0_ps_hlsl, convert_UV_ps);
524+
create_pixel_shader_helper(convert_yuv420_packed_uv_type0_ps_linear_hlsl, convert_UV_fp16_ps);
525+
}
526+
break;
527+
528+
case DXGI_FORMAT_P010:
529+
// Semi-planar 16-bit YUV 4:2:0, 10 most significant bits store the value
530+
create_vertex_shader_helper(convert_yuv420_planar_y_vs_hlsl, convert_Y_or_YUV_vs);
531+
create_pixel_shader_helper(convert_yuv420_planar_y_ps_hlsl, convert_Y_or_YUV_ps);
532+
if (display->is_hdr()) {
533+
create_pixel_shader_helper(convert_yuv420_planar_y_ps_perceptual_quantizer_hlsl, convert_Y_or_YUV_fp16_ps);
534+
}
535+
else {
536+
create_pixel_shader_helper(convert_yuv420_planar_y_ps_linear_hlsl, convert_Y_or_YUV_fp16_ps);
537+
}
538+
if (downscaling) {
539+
create_vertex_shader_helper(convert_yuv420_packed_uv_type0s_vs_hlsl, convert_UV_vs);
540+
create_pixel_shader_helper(convert_yuv420_packed_uv_type0s_ps_hlsl, convert_UV_ps);
541+
if (display->is_hdr()) {
542+
create_pixel_shader_helper(convert_yuv420_packed_uv_type0s_ps_perceptual_quantizer_hlsl, convert_UV_fp16_ps);
543+
}
544+
else {
545+
create_pixel_shader_helper(convert_yuv420_packed_uv_type0s_ps_linear_hlsl, convert_UV_fp16_ps);
546+
}
547+
}
548+
else {
549+
create_vertex_shader_helper(convert_yuv420_packed_uv_type0_vs_hlsl, convert_UV_vs);
550+
create_pixel_shader_helper(convert_yuv420_packed_uv_type0_ps_hlsl, convert_UV_ps);
551+
if (display->is_hdr()) {
552+
create_pixel_shader_helper(convert_yuv420_packed_uv_type0_ps_perceptual_quantizer_hlsl, convert_UV_fp16_ps);
553+
}
554+
else {
555+
create_pixel_shader_helper(convert_yuv420_packed_uv_type0_ps_linear_hlsl, convert_UV_fp16_ps);
556+
}
557+
}
558+
break;
559+
560+
case DXGI_FORMAT_R16_UINT:
561+
// Planar 16-bit YUV 4:4:4, 10 most significant bits store the value
562+
create_vertex_shader_helper(convert_yuv444_planar_vs_hlsl, convert_Y_or_YUV_vs);
563+
create_pixel_shader_helper(convert_yuv444_planar_ps_hlsl, convert_Y_or_YUV_ps);
564+
if (display->is_hdr()) {
565+
create_pixel_shader_helper(convert_yuv444_planar_ps_perceptual_quantizer_hlsl, convert_Y_or_YUV_fp16_ps);
566+
}
567+
else {
568+
create_pixel_shader_helper(convert_yuv444_planar_ps_linear_hlsl, convert_Y_or_YUV_fp16_ps);
569+
}
570+
break;
571+
572+
case DXGI_FORMAT_AYUV:
573+
// Packed 8-bit YUV 4:4:4
574+
create_vertex_shader_helper(convert_yuv444_packed_vs_hlsl, convert_Y_or_YUV_vs);
575+
create_pixel_shader_helper(convert_yuv444_packed_ayuv_ps_hlsl, convert_Y_or_YUV_ps);
576+
create_pixel_shader_helper(convert_yuv444_packed_ayuv_ps_linear_hlsl, convert_Y_or_YUV_fp16_ps);
577+
break;
578+
579+
case DXGI_FORMAT_Y410:
580+
// Packed 10-bit YUV 4:4:4
581+
create_vertex_shader_helper(convert_yuv444_packed_vs_hlsl, convert_Y_or_YUV_vs);
582+
create_pixel_shader_helper(convert_yuv444_packed_y410_ps_hlsl, convert_Y_or_YUV_ps);
583+
if (display->is_hdr()) {
584+
create_pixel_shader_helper(convert_yuv444_packed_y410_ps_perceptual_quantizer_hlsl, convert_Y_or_YUV_fp16_ps);
585+
}
586+
else {
587+
create_pixel_shader_helper(convert_yuv444_packed_y410_ps_linear_hlsl, convert_Y_or_YUV_fp16_ps);
588+
}
589+
break;
590+
591+
default:
592+
BOOST_LOG(error) << "Unable to create shaders because of the unrecognized surface format";
593+
return -1;
594+
}
595+
596+
#undef create_vertex_shader_helper
597+
#undef create_pixel_shader_helper
598+
491599
auto out_width = width;
492600
auto out_height = height;
493601

@@ -676,83 +784,6 @@ namespace platf::dxgi {
676784
BOOST_LOG(warning) << "Failed to increase encoding GPU thread priority. Please run application as administrator for optimal performance.";
677785
}
678786

679-
#define create_vertex_shader_helper(x, y) \
680-
if (FAILED(status = device->CreateVertexShader(x->GetBufferPointer(), x->GetBufferSize(), nullptr, &y))) { \
681-
BOOST_LOG(error) << "Failed to create vertex shader " << #x << ": " << util::log_hex(status); \
682-
return -1; \
683-
}
684-
#define create_pixel_shader_helper(x, y) \
685-
if (FAILED(status = device->CreatePixelShader(x->GetBufferPointer(), x->GetBufferSize(), nullptr, &y))) { \
686-
BOOST_LOG(error) << "Failed to create pixel shader " << #x << ": " << util::log_hex(status); \
687-
return -1; \
688-
}
689-
690-
switch (format) {
691-
case DXGI_FORMAT_NV12:
692-
// Semi-planar 8-bit YUV 4:2:0
693-
create_vertex_shader_helper(convert_yuv420_planar_y_vs_hlsl, convert_Y_or_YUV_vs);
694-
create_pixel_shader_helper(convert_yuv420_planar_y_ps_hlsl, convert_Y_or_YUV_ps);
695-
create_pixel_shader_helper(convert_yuv420_planar_y_ps_linear_hlsl, convert_Y_or_YUV_fp16_ps);
696-
create_vertex_shader_helper(convert_yuv420_packed_uv_type0_vs_hlsl, convert_UV_vs);
697-
create_pixel_shader_helper(convert_yuv420_packed_uv_type0_ps_hlsl, convert_UV_ps);
698-
create_pixel_shader_helper(convert_yuv420_packed_uv_type0_ps_linear_hlsl, convert_UV_fp16_ps);
699-
break;
700-
701-
case DXGI_FORMAT_P010:
702-
// Semi-planar 16-bit YUV 4:2:0, 10 most significant bits store the value
703-
create_vertex_shader_helper(convert_yuv420_planar_y_vs_hlsl, convert_Y_or_YUV_vs);
704-
create_pixel_shader_helper(convert_yuv420_planar_y_ps_hlsl, convert_Y_or_YUV_ps);
705-
create_vertex_shader_helper(convert_yuv420_packed_uv_type0_vs_hlsl, convert_UV_vs);
706-
create_pixel_shader_helper(convert_yuv420_packed_uv_type0_ps_hlsl, convert_UV_ps);
707-
if (display->is_hdr()) {
708-
create_pixel_shader_helper(convert_yuv420_planar_y_ps_perceptual_quantizer_hlsl, convert_Y_or_YUV_fp16_ps);
709-
create_pixel_shader_helper(convert_yuv420_packed_uv_type0_ps_perceptual_quantizer_hlsl, convert_UV_fp16_ps);
710-
}
711-
else {
712-
create_pixel_shader_helper(convert_yuv420_planar_y_ps_linear_hlsl, convert_Y_or_YUV_fp16_ps);
713-
create_pixel_shader_helper(convert_yuv420_packed_uv_type0_ps_linear_hlsl, convert_UV_fp16_ps);
714-
}
715-
break;
716-
717-
case DXGI_FORMAT_R16_UINT:
718-
// Planar 16-bit YUV 4:4:4, 10 most significant bits store the value
719-
create_vertex_shader_helper(convert_yuv444_planar_vs_hlsl, convert_Y_or_YUV_vs);
720-
create_pixel_shader_helper(convert_yuv444_planar_ps_hlsl, convert_Y_or_YUV_ps);
721-
if (display->is_hdr()) {
722-
create_pixel_shader_helper(convert_yuv444_planar_ps_perceptual_quantizer_hlsl, convert_Y_or_YUV_fp16_ps);
723-
}
724-
else {
725-
create_pixel_shader_helper(convert_yuv444_planar_ps_linear_hlsl, convert_Y_or_YUV_fp16_ps);
726-
}
727-
break;
728-
729-
case DXGI_FORMAT_AYUV:
730-
// Packed 8-bit YUV 4:4:4
731-
create_vertex_shader_helper(convert_yuv444_packed_vs_hlsl, convert_Y_or_YUV_vs);
732-
create_pixel_shader_helper(convert_yuv444_packed_ayuv_ps_hlsl, convert_Y_or_YUV_ps);
733-
create_pixel_shader_helper(convert_yuv444_packed_ayuv_ps_linear_hlsl, convert_Y_or_YUV_fp16_ps);
734-
break;
735-
736-
case DXGI_FORMAT_Y410:
737-
// Packed 10-bit YUV 4:4:4
738-
create_vertex_shader_helper(convert_yuv444_packed_vs_hlsl, convert_Y_or_YUV_vs);
739-
create_pixel_shader_helper(convert_yuv444_packed_y410_ps_hlsl, convert_Y_or_YUV_ps);
740-
if (display->is_hdr()) {
741-
create_pixel_shader_helper(convert_yuv444_packed_y410_ps_perceptual_quantizer_hlsl, convert_Y_or_YUV_fp16_ps);
742-
}
743-
else {
744-
create_pixel_shader_helper(convert_yuv444_packed_y410_ps_linear_hlsl, convert_Y_or_YUV_fp16_ps);
745-
}
746-
break;
747-
748-
default:
749-
BOOST_LOG(error) << "Unable to create shaders because of the unrecognized surface format";
750-
return -1;
751-
}
752-
753-
#undef create_vertex_shader_helper
754-
#undef create_pixel_shader_helper
755-
756787
auto default_color_vectors = ::video::color_vectors_from_colorspace(::video::colorspace_e::rec601, false);
757788
if (!default_color_vectors) {
758789
BOOST_LOG(error) << "Missing color vectors for Rec. 601"sv;
@@ -1916,6 +1947,10 @@ namespace platf::dxgi {
19161947
compile_pixel_shader_helper(convert_yuv420_packed_uv_type0_ps_linear);
19171948
compile_pixel_shader_helper(convert_yuv420_packed_uv_type0_ps_perceptual_quantizer);
19181949
compile_vertex_shader_helper(convert_yuv420_packed_uv_type0_vs);
1950+
compile_pixel_shader_helper(convert_yuv420_packed_uv_type0s_ps);
1951+
compile_pixel_shader_helper(convert_yuv420_packed_uv_type0s_ps_linear);
1952+
compile_pixel_shader_helper(convert_yuv420_packed_uv_type0s_ps_perceptual_quantizer);
1953+
compile_vertex_shader_helper(convert_yuv420_packed_uv_type0s_vs);
19191954
compile_pixel_shader_helper(convert_yuv420_planar_y_ps);
19201955
compile_pixel_shader_helper(convert_yuv420_planar_y_ps_linear);
19211956
compile_pixel_shader_helper(convert_yuv420_planar_y_ps_perceptual_quantizer);
Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,5 @@
1+
#include "include/convert_base.hlsl"
2+
3+
#define LEFT_SUBSAMPLING_SCALE
4+
5+
#include "include/convert_yuv420_packed_uv_ps_base.hlsl"
Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,5 @@
1+
#include "include/convert_linear_base.hlsl"
2+
3+
#define LEFT_SUBSAMPLING_SCALE
4+
5+
#include "include/convert_yuv420_packed_uv_ps_base.hlsl"
Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,5 @@
1+
#include "include/convert_perceptual_quantizer_base.hlsl"
2+
3+
#define LEFT_SUBSAMPLING_SCALE
4+
5+
#include "include/convert_yuv420_packed_uv_ps_base.hlsl"
Lines changed: 15 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,15 @@
1+
cbuffer subsample_offset_cbuffer : register(b0) {
2+
float2 subsample_offset;
3+
};
4+
5+
cbuffer rotate_texture_steps_cbuffer : register(b1) {
6+
int rotate_texture_steps;
7+
};
8+
9+
#define LEFT_SUBSAMPLING_SCALE
10+
#include "include/base_vs.hlsl"
11+
12+
vertex_t main_vs(uint vertex_id : SV_VertexID)
13+
{
14+
return generate_fullscreen_triangle_vertex(vertex_id, subsample_offset / 2, rotate_texture_steps);
15+
}

src_assets/windows/assets/shaders/directx/include/base_vs.hlsl

Lines changed: 11 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,8 @@
22

33
#if defined(LEFT_SUBSAMPLING)
44
vertex_t generate_fullscreen_triangle_vertex(uint vertex_id, float subsample_offset, int rotate_texture_steps)
5+
#elif defined(LEFT_SUBSAMPLING_SCALE)
6+
vertex_t generate_fullscreen_triangle_vertex(uint vertex_id, float2 halfsample_offset, int rotate_texture_steps)
57
#elif defined(TOPLEFT_SUBSAMPLING)
68
vertex_t generate_fullscreen_triangle_vertex(uint vertex_id, float2 subsample_offset, int rotate_texture_steps)
79
#else
@@ -34,7 +36,15 @@ vertex_t generate_fullscreen_triangle_vertex(uint vertex_id, int rotate_texture_
3436

3537
#if defined(LEFT_SUBSAMPLING)
3638
output.tex_right_left_center = float3(tex_coord.x, tex_coord.x - subsample_offset, tex_coord.y);
37-
#elif defined (TOPLEFT_SUBSAMPLING)
39+
#elif defined(LEFT_SUBSAMPLING_SCALE)
40+
float3 right_center_left = float3(tex_coord.x + halfsample_offset.x,
41+
tex_coord.x - halfsample_offset.x,
42+
tex_coord.x - 3 * halfsample_offset.x);
43+
float2 top_bottom = float2(tex_coord.y - halfsample_offset.y,
44+
tex_coord.y + halfsample_offset.y);
45+
output.tex_right_center_left_top = float4(right_center_left, top_bottom.x);
46+
output.tex_right_center_left_bottom = float4(right_center_left, top_bottom.y);
47+
#elif defined(TOPLEFT_SUBSAMPLING)
3848
output.tex_right_left_top = float3(tex_coord.x, tex_coord.x - subsample_offset.x, tex_coord.y - subsample_offset.y);
3949
output.tex_right_left_bottom = float3(tex_coord.x, tex_coord.x - subsample_offset.x, tex_coord.y);
4050
#else

src_assets/windows/assets/shaders/directx/include/base_vs_types.hlsl

Lines changed: 6 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -3,9 +3,12 @@ struct vertex_t
33
float4 viewpoint_pos : SV_Position;
44
#if defined(LEFT_SUBSAMPLING)
55
float3 tex_right_left_center : TEXCOORD;
6-
#elif defined (TOPLEFT_SUBSAMPLING)
7-
float3 tex_right_left_top : TEXCOORD;
8-
float3 tex_right_left_bottom : TEXCOORD;
6+
#elif defined(LEFT_SUBSAMPLING_SCALE)
7+
float4 tex_right_center_left_top : TEXCOORD0;
8+
float4 tex_right_center_left_bottom : TEXCOORD1;
9+
#elif defined(TOPLEFT_SUBSAMPLING)
10+
float3 tex_right_left_top : TEXCOORD0;
11+
float3 tex_right_left_bottom : TEXCOORD1;
912
#else
1013
float2 tex_coord : TEXCOORD;
1114
#endif

src_assets/windows/assets/shaders/directx/include/convert_yuv420_packed_uv_ps_base.hlsl

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -17,6 +17,14 @@ float2 main_ps(vertex_t input) : SV_Target
1717
float3 rgb_left = image.Sample(def_sampler, input.tex_right_left_center.xz).rgb;
1818
float3 rgb_right = image.Sample(def_sampler, input.tex_right_left_center.yz).rgb;
1919
float3 rgb = CONVERT_FUNCTION((rgb_left + rgb_right) * 0.5);
20+
#elif defined(LEFT_SUBSAMPLING_SCALE)
21+
float3 rgb = image.Sample(def_sampler, input.tex_right_center_left_top.xw).rgb; // top-right
22+
rgb += image.Sample(def_sampler, input.tex_right_center_left_top.yw).rgb; // top-center
23+
rgb += image.Sample(def_sampler, input.tex_right_center_left_top.zw).rgb; // top-left
24+
rgb += image.Sample(def_sampler, input.tex_right_center_left_bottom.xw).rgb; // bottom-right
25+
rgb += image.Sample(def_sampler, input.tex_right_center_left_bottom.yw).rgb; // bottom-center
26+
rgb += image.Sample(def_sampler, input.tex_right_center_left_bottom.zw).rgb; // bottom-left
27+
rgb = CONVERT_FUNCTION(rgb * (1./6));
2028
#elif defined(TOPLEFT_SUBSAMPLING)
2129
float3 rgb_top_left = image.Sample(def_sampler, input.tex_right_left_top.xz).rgb;
2230
float3 rgb_top_right = image.Sample(def_sampler, input.tex_right_left_top.yz).rgb;

0 commit comments

Comments
 (0)