Skip to content

Commit cf79e44

Browse files
authored
Merge branch 'master' into bp/reduceallocations
2 parents 853b117 + 49bd35c commit cf79e44

File tree

3 files changed

+90
-23
lines changed

3 files changed

+90
-23
lines changed

src/ImageSharp/Formats/Webp/Lossless/LosslessUtils.cs

Lines changed: 43 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -564,6 +564,7 @@ public static void PredictorInverseTransform(
564564
int mask = tileWidth - 1;
565565
int tilesPerRow = SubSampleSize(width, transform.Bits);
566566
int predictorModeIdxBase = (y >> transform.Bits) * tilesPerRow;
567+
Span<short> scratch = stackalloc short[8];
567568
while (y < yEnd)
568569
{
569570
int predictorModeIdx = predictorModeIdxBase;
@@ -621,7 +622,7 @@ public static void PredictorInverseTransform(
621622
PredictorAdd10(input + x, output + x - width, xEnd - x, output + x);
622623
break;
623624
case 11:
624-
PredictorAdd11(input + x, output + x - width, xEnd - x, output + x);
625+
PredictorAdd11(input + x, output + x - width, xEnd - x, output + x, scratch);
625626
break;
626627
case 12:
627628
PredictorAdd12(input + x, output + x - width, xEnd - x, output + x);
@@ -987,11 +988,11 @@ private static void PredictorAdd10(uint* input, uint* upper, int numberOfPixels,
987988
}
988989

989990
[MethodImpl(InliningOptions.ShortMethod)]
990-
private static void PredictorAdd11(uint* input, uint* upper, int numberOfPixels, uint* output)
991+
private static void PredictorAdd11(uint* input, uint* upper, int numberOfPixels, uint* output, Span<short> scratch)
991992
{
992993
for (int x = 0; x < numberOfPixels; x++)
993994
{
994-
uint pred = Predictor11(output[x - 1], upper + x);
995+
uint pred = Predictor11(output[x - 1], upper + x, scratch);
995996
output[x] = AddPixels(input[x], pred);
996997
}
997998
}
@@ -1044,7 +1045,7 @@ private static void PredictorAdd13(uint* input, uint* upper, int numberOfPixels,
10441045
public static uint Predictor10(uint left, uint* top) => Average4(left, top[-1], top[0], top[1]);
10451046

10461047
[MethodImpl(InliningOptions.ShortMethod)]
1047-
public static uint Predictor11(uint left, uint* top) => Select(top[0], left, top[-1]);
1048+
public static uint Predictor11(uint left, uint* top, Span<short> scratch) => Select(top[0], left, top[-1], scratch);
10481049

10491050
[MethodImpl(InliningOptions.ShortMethod)]
10501051
public static uint Predictor12(uint left, uint* top) => ClampedAddSubtractFull(left, top[0], top[-1]);
@@ -1161,11 +1162,11 @@ public static void PredictorSub10(uint* input, uint* upper, int numPixels, uint*
11611162
}
11621163

11631164
[MethodImpl(InliningOptions.ShortMethod)]
1164-
public static void PredictorSub11(uint* input, uint* upper, int numPixels, uint* output)
1165+
public static void PredictorSub11(uint* input, uint* upper, int numPixels, uint* output, Span<short> scratch)
11651166
{
11661167
for (int x = 0; x < numPixels; x++)
11671168
{
1168-
uint pred = Predictor11(input[x - 1], upper + x);
1169+
uint pred = Predictor11(input[x - 1], upper + x, scratch);
11691170
output[x] = SubPixels(input[x], pred);
11701171
}
11711172
}
@@ -1253,14 +1254,43 @@ private static uint ClampedAddSubtractHalf(uint c0, uint c1, uint c2)
12531254
private static Vector128<int> MkCst16(int hi, int lo) => Vector128.Create((hi << 16) | (lo & 0xffff));
12541255
#endif
12551256

1256-
private static uint Select(uint a, uint b, uint c)
1257+
private static uint Select(uint a, uint b, uint c, Span<short> scratch)
12571258
{
1258-
int paMinusPb =
1259-
Sub3((int)(a >> 24), (int)(b >> 24), (int)(c >> 24)) +
1260-
Sub3((int)((a >> 16) & 0xff), (int)((b >> 16) & 0xff), (int)((c >> 16) & 0xff)) +
1261-
Sub3((int)((a >> 8) & 0xff), (int)((b >> 8) & 0xff), (int)((c >> 8) & 0xff)) +
1262-
Sub3((int)(a & 0xff), (int)(b & 0xff), (int)(c & 0xff));
1263-
return paMinusPb <= 0 ? a : b;
1259+
#if SUPPORTS_RUNTIME_INTRINSICS
1260+
if (Sse2.IsSupported)
1261+
{
1262+
Span<short> output = scratch;
1263+
fixed (short* p = output)
1264+
{
1265+
Vector128<byte> a0 = Sse2.ConvertScalarToVector128UInt32(a).AsByte();
1266+
Vector128<byte> b0 = Sse2.ConvertScalarToVector128UInt32(b).AsByte();
1267+
Vector128<byte> c0 = Sse2.ConvertScalarToVector128UInt32(c).AsByte();
1268+
Vector128<byte> ac0 = Sse2.SubtractSaturate(a0, c0);
1269+
Vector128<byte> ca0 = Sse2.SubtractSaturate(c0, a0);
1270+
Vector128<byte> bc0 = Sse2.SubtractSaturate(b0, c0);
1271+
Vector128<byte> cb0 = Sse2.SubtractSaturate(c0, b0);
1272+
Vector128<byte> ac = Sse2.Or(ac0, ca0);
1273+
Vector128<byte> bc = Sse2.Or(bc0, cb0);
1274+
Vector128<byte> pa = Sse2.UnpackLow(ac, Vector128<byte>.Zero); // |a - c|
1275+
Vector128<byte> pb = Sse2.UnpackLow(bc, Vector128<byte>.Zero); // |b - c|
1276+
Vector128<ushort> diff = Sse2.Subtract(pb.AsUInt16(), pa.AsUInt16());
1277+
Sse2.Store((ushort*)p, diff);
1278+
}
1279+
1280+
int paMinusPb = output[0] + output[1] + output[2] + output[3];
1281+
1282+
return (paMinusPb <= 0) ? a : b;
1283+
}
1284+
else
1285+
#endif
1286+
{
1287+
int paMinusPb =
1288+
Sub3((int)(a >> 24), (int)(b >> 24), (int)(c >> 24)) +
1289+
Sub3((int)((a >> 16) & 0xff), (int)((b >> 16) & 0xff), (int)((c >> 16) & 0xff)) +
1290+
Sub3((int)((a >> 8) & 0xff), (int)((b >> 8) & 0xff), (int)((c >> 8) & 0xff)) +
1291+
Sub3((int)(a & 0xff), (int)(b & 0xff), (int)(c & 0xff));
1292+
return paMinusPb <= 0 ? a : b;
1293+
}
12641294
}
12651295

12661296
[MethodImpl(InliningOptions.ShortMethod)]

src/ImageSharp/Formats/Webp/Lossless/PredictorEncoder.cs

Lines changed: 17 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -76,6 +76,7 @@ public static void ResidualImage(
7676
int tilesPerRow = LosslessUtils.SubSampleSize(width, bits);
7777
int tilesPerCol = LosslessUtils.SubSampleSize(height, bits);
7878
int maxQuantization = 1 << LosslessUtils.NearLosslessBits(nearLosslessQuality);
79+
Span<short> scratch = stackalloc short[8];
7980

8081
// TODO: Can we optimize this?
8182
int[][] histo = new int[4][];
@@ -112,7 +113,8 @@ public static void ResidualImage(
112113
transparentColorMode,
113114
usedSubtractGreen,
114115
nearLossless,
115-
image);
116+
image,
117+
scratch);
116118

117119
image[(tileY * tilesPerRow) + tileX] = (uint)(WebpConstants.ArgbBlack | (pred << 8));
118120
}
@@ -223,7 +225,8 @@ private static int GetBestPredictorForTile(
223225
WebpTransparentColorMode transparentColorMode,
224226
bool usedSubtractGreen,
225227
bool nearLossless,
226-
Span<uint> modes)
228+
Span<uint> modes,
229+
Span<short> scratch)
227230
{
228231
const int numPredModes = 14;
229232
int startX = tileX << bits;
@@ -296,7 +299,7 @@ private static int GetBestPredictorForTile(
296299
}
297300
}
298301

299-
GetResidual(width, height, upperRow, currentRow, maxDiffs, mode, startX, startX + maxX, y, maxQuantization, transparentColorMode, usedSubtractGreen, nearLossless, residuals);
302+
GetResidual(width, height, upperRow, currentRow, maxDiffs, mode, startX, startX + maxX, y, maxQuantization, transparentColorMode, usedSubtractGreen, nearLossless, residuals, scratch);
300303
for (int relativeX = 0; relativeX < maxX; ++relativeX)
301304
{
302305
UpdateHisto(histoArgb, residuals[relativeX]);
@@ -362,11 +365,12 @@ private static void GetResidual(
362365
WebpTransparentColorMode transparentColorMode,
363366
bool usedSubtractGreen,
364367
bool nearLossless,
365-
Span<uint> output)
368+
Span<uint> output,
369+
Span<short> scratch)
366370
{
367371
if (transparentColorMode == WebpTransparentColorMode.Preserve)
368372
{
369-
PredictBatch(mode, xStart, y, xEnd - xStart, currentRowSpan, upperRowSpan, output);
373+
PredictBatch(mode, xStart, y, xEnd - xStart, currentRowSpan, upperRowSpan, output, scratch);
370374
}
371375
else
372376
{
@@ -424,7 +428,7 @@ private static void GetResidual(
424428
predict = LosslessUtils.Predictor10(currentRow[x - 1], upperRow + x);
425429
break;
426430
case 11:
427-
predict = LosslessUtils.Predictor11(currentRow[x - 1], upperRow + x);
431+
predict = LosslessUtils.Predictor11(currentRow[x - 1], upperRow + x, scratch);
428432
break;
429433
case 12:
430434
predict = LosslessUtils.Predictor12(currentRow[x - 1], upperRow + x);
@@ -612,6 +616,7 @@ private static void CopyImageWithPrediction(
612616
Span<byte> currentMaxDiffs = MemoryMarshal.Cast<uint, byte>(currentRow.Slice(width + 1));
613617

614618
Span<byte> lowerMaxDiffs = currentMaxDiffs.Slice(width);
619+
Span<short> scratch = stackalloc short[8];
615620
for (int y = 0; y < height; y++)
616621
{
617622
Span<uint> tmp32 = upperRow;
@@ -622,7 +627,7 @@ private static void CopyImageWithPrediction(
622627

623628
if (lowEffort)
624629
{
625-
PredictBatch(PredLowEffort, 0, y, width, currentRow, upperRow, argb.Slice(y * width));
630+
PredictBatch(PredLowEffort, 0, y, width, currentRow, upperRow, argb.Slice(y * width), scratch);
626631
}
627632
else
628633
{
@@ -663,7 +668,8 @@ private static void CopyImageWithPrediction(
663668
transparentColorMode,
664669
usedSubtractGreen,
665670
nearLossless,
666-
argb.Slice((y * width) + x));
671+
argb.Slice((y * width) + x),
672+
scratch);
667673

668674
x = xEnd;
669675
}
@@ -678,7 +684,8 @@ private static void PredictBatch(
678684
int numPixels,
679685
Span<uint> currentSpan,
680686
Span<uint> upperSpan,
681-
Span<uint> outputSpan)
687+
Span<uint> outputSpan,
688+
Span<short> scratch)
682689
{
683690
#pragma warning disable SA1503 // Braces should not be omitted
684691
fixed (uint* current = currentSpan)
@@ -747,7 +754,7 @@ private static void PredictBatch(
747754
LosslessUtils.PredictorSub10(current + xStart, upper + xStart, numPixels, output);
748755
break;
749756
case 11:
750-
LosslessUtils.PredictorSub11(current + xStart, upper + xStart, numPixels, output);
757+
LosslessUtils.PredictorSub11(current + xStart, upper + xStart, numPixels, output, scratch);
751758
break;
752759
case 12:
753760
LosslessUtils.PredictorSub12(current + xStart, upper + xStart, numPixels, output);

tests/ImageSharp.Tests/Formats/WebP/LosslessUtilsTests.cs

Lines changed: 30 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -132,6 +132,30 @@ private static void RunTransformColorInverseTest()
132132
Assert.Equal(expectedOutput, pixelData);
133133
}
134134

135+
private static void RunPredictor11Test()
136+
{
137+
// arrange
138+
uint[] topData = { 4278258949, 4278258949 };
139+
uint left = 4294839812;
140+
short[] scratch = new short[8];
141+
uint expectedResult = 4294839812;
142+
143+
// act
144+
unsafe
145+
{
146+
fixed (uint* top = &topData[1])
147+
{
148+
uint actual = LosslessUtils.Predictor11(left, top, scratch);
149+
150+
// assert
151+
Assert.Equal(expectedResult, actual);
152+
}
153+
}
154+
}
155+
156+
[Fact]
157+
public void Predictor11_Works() => RunPredictor11Test();
158+
135159
[Fact]
136160
public void SubtractGreen_Works() => RunSubtractGreenTest();
137161

@@ -145,6 +169,12 @@ private static void RunTransformColorInverseTest()
145169
public void TransformColorInverse_Works() => RunTransformColorInverseTest();
146170

147171
#if SUPPORTS_RUNTIME_INTRINSICS
172+
[Fact]
173+
public void Predictor11_WithHardwareIntrinsics_Works() => FeatureTestRunner.RunWithHwIntrinsicsFeature(RunPredictor11Test, HwIntrinsics.AllowAll);
174+
175+
[Fact]
176+
public void Predictor11_WithoutSSE2_Works() => FeatureTestRunner.RunWithHwIntrinsicsFeature(RunPredictor11Test, HwIntrinsics.DisableSSE2);
177+
148178
[Fact]
149179
public void SubtractGreen_WithHardwareIntrinsics_Works() => FeatureTestRunner.RunWithHwIntrinsicsFeature(RunSubtractGreenTest, HwIntrinsics.AllowAll);
150180

0 commit comments

Comments
 (0)