From b3e40c1d2226b4b3ee0b1b88648b71e6f17f88be Mon Sep 17 00:00:00 2001 From: James Jackson-South Date: Wed, 27 Jan 2021 01:34:49 +0000 Subject: [PATCH 01/14] Use less expensive update for RowOctet --- .../Encoder/YCbCrForwardConverter{TPixel}.cs | 6 +- .../Jpeg/Components/GenericBlock8x8.cs | 2 +- .../Formats/Jpeg/Components/RowOctet.cs | 79 ++++++++++++++----- .../Formats/Jpeg/JpegEncoderCore.cs | 12 +-- .../Codecs/Jpeg/EncodeJpeg.cs | 2 +- .../Formats/Jpg/GenericBlock8x8Tests.cs | 4 +- 6 files changed, 74 insertions(+), 31 deletions(-) diff --git a/src/ImageSharp/Formats/Jpeg/Components/Encoder/YCbCrForwardConverter{TPixel}.cs b/src/ImageSharp/Formats/Jpeg/Components/Encoder/YCbCrForwardConverter{TPixel}.cs index 8fcc63c6aa..81e64b277b 100644 --- a/src/ImageSharp/Formats/Jpeg/Components/Encoder/YCbCrForwardConverter{TPixel}.cs +++ b/src/ImageSharp/Formats/Jpeg/Components/Encoder/YCbCrForwardConverter{TPixel}.cs @@ -59,9 +59,9 @@ public static YCbCrForwardConverter Create() /// /// Converts a 8x8 image area inside 'pixels' at position (x,y) placing the result members of the structure (, , ) /// - public void Convert(ImageFrame frame, int x, int y, in RowOctet currentRows) + public void Convert(ImageFrame frame, int x, int y, ref RowOctet currentRows) { - this.pixelBlock.LoadAndStretchEdges(frame.PixelBuffer, x, y, currentRows); + this.pixelBlock.LoadAndStretchEdges(frame.PixelBuffer, x, y, ref currentRows); Span rgbSpan = this.rgbBlock.AsSpanUnsafe(); PixelOperations.Instance.ToRgb24(frame.GetConfiguration(), this.pixelBlock.AsSpanUnsafe(), rgbSpan); @@ -76,7 +76,7 @@ public void Convert(ImageFrame frame, int x, int y, in RowOctet } else { - this.colorTables.Convert(rgbSpan, ref yBlock, ref cbBlock, ref crBlock); + this.colorTables.Convert(rgbSpan, ref yBlock, ref cbBlock, ref crBlock); } } } diff --git a/src/ImageSharp/Formats/Jpeg/Components/GenericBlock8x8.cs b/src/ImageSharp/Formats/Jpeg/Components/GenericBlock8x8.cs index 92ba1afd35..42c01d770e 100644 --- a/src/ImageSharp/Formats/Jpeg/Components/GenericBlock8x8.cs +++ b/src/ImageSharp/Formats/Jpeg/Components/GenericBlock8x8.cs @@ -57,7 +57,7 @@ public T this[int idx] /// Load a 8x8 region of an image into the block. /// The "outlying" area of the block will be stretched out with pixels on the right and bottom edge of the image. /// - public void LoadAndStretchEdges(Buffer2D source, int sourceX, int sourceY, in RowOctet currentRows) + public void LoadAndStretchEdges(Buffer2D source, int sourceX, int sourceY, ref RowOctet currentRows) { int width = Math.Min(8, source.Width - sourceX); int height = Math.Min(8, source.Height - sourceY); diff --git a/src/ImageSharp/Formats/Jpeg/Components/RowOctet.cs b/src/ImageSharp/Formats/Jpeg/Components/RowOctet.cs index ae10bfba83..930d8b18c7 100644 --- a/src/ImageSharp/Formats/Jpeg/Components/RowOctet.cs +++ b/src/ImageSharp/Formats/Jpeg/Components/RowOctet.cs @@ -1,4 +1,4 @@ -// Copyright (c) Six Labors. +// Copyright (c) Six Labors. // Licensed under the Apache License, Version 2.0. using System; @@ -12,18 +12,19 @@ namespace SixLabors.ImageSharp.Formats.Jpeg.Components /// Cache 8 pixel rows on the stack, which may originate from different buffers of a . /// [StructLayout(LayoutKind.Sequential)] - internal readonly ref struct RowOctet + internal ref struct RowOctet where T : struct { - private readonly Span row0; - private readonly Span row1; - private readonly Span row2; - private readonly Span row3; - private readonly Span row4; - private readonly Span row5; - private readonly Span row6; - private readonly Span row7; + private Span row0; + private Span row1; + private Span row2; + private Span row3; + private Span row4; + private Span row5; + private Span row6; + private Span row7; + [MethodImpl(MethodImplOptions.AggressiveInlining)] public RowOctet(Buffer2D buffer, int startY) { int y = startY; @@ -38,13 +39,12 @@ public RowOctet(Buffer2D buffer, int startY) this.row7 = y < height ? buffer.GetRowSpan(y) : default; } + // No unsafe tricks, since Span can't be used as a generic argument public Span this[int y] { - [MethodImpl(InliningOptions.ShortMethod)] - get - { - // No unsafe tricks, since Span can't be used as a generic argument - return y switch + [MethodImpl(MethodImplOptions.AggressiveInlining)] + get => + y switch { 0 => this.row0, 1 => this.row1, @@ -56,13 +56,56 @@ public Span this[int y] 7 => this.row7, _ => ThrowIndexOutOfRangeException() }; + + [MethodImpl(MethodImplOptions.AggressiveInlining)] + private set + { + switch (y) + { + case 0: + this.row0 = value; + break; + case 1: + this.row1 = value; + break; + case 2: + this.row2 = value; + break; + case 3: + this.row3 = value; + break; + case 4: + this.row4 = value; + break; + case 5: + this.row5 = value; + break; + case 6: + this.row6 = value; + break; + default: + this.row7 = value; + break; + } } } - [MethodImpl(InliningOptions.ColdPath)] - private static Span ThrowIndexOutOfRangeException() + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public void Update(Buffer2D buffer, int startY) { - throw new IndexOutOfRangeException(); + int y = startY; + int height = buffer.Height; + + // We don't actually have to assign values outside of the + // frame pixel buffer since they are never requested. + for (int i = 0; i < 8 && y < height; i++) + { + this[i] = buffer.GetRowSpan(y++); + } } + + [MethodImpl(MethodImplOptions.NoInlining)] + private static Span ThrowIndexOutOfRangeException() + => throw new IndexOutOfRangeException(); } } diff --git a/src/ImageSharp/Formats/Jpeg/JpegEncoderCore.cs b/src/ImageSharp/Formats/Jpeg/JpegEncoderCore.cs index 36766d05f0..31f2cfc3f3 100644 --- a/src/ImageSharp/Formats/Jpeg/JpegEncoderCore.cs +++ b/src/ImageSharp/Formats/Jpeg/JpegEncoderCore.cs @@ -418,15 +418,16 @@ private void Encode444(Image pixels, CancellationToken cancellat var pixelConverter = YCbCrForwardConverter.Create(); ImageFrame frame = pixels.Frames.RootFrame; Buffer2D pixelBuffer = frame.PixelBuffer; + RowOctet currentRows = default; for (int y = 0; y < pixels.Height; y += 8) { cancellationToken.ThrowIfCancellationRequested(); - var currentRows = new RowOctet(pixelBuffer, y); + currentRows.Update(pixelBuffer, y); for (int x = 0; x < pixels.Width; x += 8) { - pixelConverter.Convert(frame, x, y, currentRows); + pixelConverter.Convert(frame, x, y, ref currentRows); prevDCY = this.WriteBlock( QuantIndex.Luminance, @@ -997,6 +998,7 @@ private void Encode420(Image pixels, CancellationToken cancellat int prevDCY = 0, prevDCCb = 0, prevDCCr = 0; ImageFrame frame = pixels.Frames.RootFrame; Buffer2D pixelBuffer = frame.PixelBuffer; + RowOctet currentRows = default; for (int y = 0; y < pixels.Height; y += 16) { @@ -1008,10 +1010,8 @@ private void Encode420(Image pixels, CancellationToken cancellat int xOff = (i & 1) * 8; int yOff = (i & 2) * 4; - // TODO: Try pushing this to the outer loop! - var currentRows = new RowOctet(pixelBuffer, y + yOff); - - pixelConverter.Convert(frame, x + xOff, y + yOff, currentRows); + currentRows.Update(pixelBuffer, y + yOff); + pixelConverter.Convert(frame, x + xOff, y + yOff, ref currentRows); cb[i] = pixelConverter.Cb; cr[i] = pixelConverter.Cr; diff --git a/tests/ImageSharp.Benchmarks/Codecs/Jpeg/EncodeJpeg.cs b/tests/ImageSharp.Benchmarks/Codecs/Jpeg/EncodeJpeg.cs index bfbd150fea..81a5604f1e 100644 --- a/tests/ImageSharp.Benchmarks/Codecs/Jpeg/EncodeJpeg.cs +++ b/tests/ImageSharp.Benchmarks/Codecs/Jpeg/EncodeJpeg.cs @@ -22,7 +22,7 @@ public void ReadImages() { if (this.bmpStream == null) { - const string TestImage = TestImages.Bmp.Car; + const string TestImage = TestImages.Bmp.NegHeight; this.bmpStream = File.OpenRead(Path.Combine(TestEnvironment.InputImagesDirectoryFullPath, TestImage)); this.bmpCore = Image.Load(this.bmpStream); this.bmpStream.Position = 0; diff --git a/tests/ImageSharp.Tests/Formats/Jpg/GenericBlock8x8Tests.cs b/tests/ImageSharp.Tests/Formats/Jpg/GenericBlock8x8Tests.cs index c366e4f56e..60449ba785 100644 --- a/tests/ImageSharp.Tests/Formats/Jpg/GenericBlock8x8Tests.cs +++ b/tests/ImageSharp.Tests/Formats/Jpg/GenericBlock8x8Tests.cs @@ -43,7 +43,7 @@ public void LoadAndStretchCorners_FromOrigo(TestImageProvider pr { var d = default(GenericBlock8x8); var rowOctet = new RowOctet(s.GetRootFramePixelBuffer(), 0); - d.LoadAndStretchEdges(s.Frames.RootFrame.PixelBuffer, 0, 0, rowOctet); + d.LoadAndStretchEdges(s.Frames.RootFrame.PixelBuffer, 0, 0, ref rowOctet); TPixel a = s.Frames.RootFrame[0, 0]; TPixel b = d[0, 0]; @@ -68,7 +68,7 @@ public void LoadAndStretchCorners_WithOffset(TestImageProvider p { var d = default(GenericBlock8x8); var rowOctet = new RowOctet(s.GetRootFramePixelBuffer(), 7); - d.LoadAndStretchEdges(s.Frames.RootFrame.PixelBuffer, 6, 7, rowOctet); + d.LoadAndStretchEdges(s.Frames.RootFrame.PixelBuffer, 6, 7, ref rowOctet); Assert.Equal(s[6, 7], d[0, 0]); Assert.Equal(s[6, 8], d[0, 1]); From 5398b40e021799a18a15e174149db72e30164931 Mon Sep 17 00:00:00 2001 From: James Jackson-South Date: Wed, 3 Feb 2021 04:10:21 +0000 Subject: [PATCH 02/14] Update RowOctet tests --- .../Formats/Jpeg/Components/RowOctet.cs | 15 --------------- .../Formats/Jpg/GenericBlock8x8Tests.cs | 7 +++++-- 2 files changed, 5 insertions(+), 17 deletions(-) diff --git a/src/ImageSharp/Formats/Jpeg/Components/RowOctet.cs b/src/ImageSharp/Formats/Jpeg/Components/RowOctet.cs index 930d8b18c7..8234c3974e 100644 --- a/src/ImageSharp/Formats/Jpeg/Components/RowOctet.cs +++ b/src/ImageSharp/Formats/Jpeg/Components/RowOctet.cs @@ -24,21 +24,6 @@ internal ref struct RowOctet private Span row6; private Span row7; - [MethodImpl(MethodImplOptions.AggressiveInlining)] - public RowOctet(Buffer2D buffer, int startY) - { - int y = startY; - int height = buffer.Height; - this.row0 = y < height ? buffer.GetRowSpan(y++) : default; - this.row1 = y < height ? buffer.GetRowSpan(y++) : default; - this.row2 = y < height ? buffer.GetRowSpan(y++) : default; - this.row3 = y < height ? buffer.GetRowSpan(y++) : default; - this.row4 = y < height ? buffer.GetRowSpan(y++) : default; - this.row5 = y < height ? buffer.GetRowSpan(y++) : default; - this.row6 = y < height ? buffer.GetRowSpan(y++) : default; - this.row7 = y < height ? buffer.GetRowSpan(y) : default; - } - // No unsafe tricks, since Span can't be used as a generic argument public Span this[int y] { diff --git a/tests/ImageSharp.Tests/Formats/Jpg/GenericBlock8x8Tests.cs b/tests/ImageSharp.Tests/Formats/Jpg/GenericBlock8x8Tests.cs index 60449ba785..c0f3b6a6a4 100644 --- a/tests/ImageSharp.Tests/Formats/Jpg/GenericBlock8x8Tests.cs +++ b/tests/ImageSharp.Tests/Formats/Jpg/GenericBlock8x8Tests.cs @@ -42,7 +42,8 @@ public void LoadAndStretchCorners_FromOrigo(TestImageProvider pr using (Image s = provider.GetImage()) { var d = default(GenericBlock8x8); - var rowOctet = new RowOctet(s.GetRootFramePixelBuffer(), 0); + RowOctet rowOctet = default; + rowOctet.Update(s.GetRootFramePixelBuffer(), 0); d.LoadAndStretchEdges(s.Frames.RootFrame.PixelBuffer, 0, 0, ref rowOctet); TPixel a = s.Frames.RootFrame[0, 0]; @@ -67,7 +68,9 @@ public void LoadAndStretchCorners_WithOffset(TestImageProvider p using (Image s = provider.GetImage()) { var d = default(GenericBlock8x8); - var rowOctet = new RowOctet(s.GetRootFramePixelBuffer(), 7); + RowOctet rowOctet = default; + rowOctet.Update(s.GetRootFramePixelBuffer(), 7); + d.LoadAndStretchEdges(s.Frames.RootFrame.PixelBuffer, 6, 7, ref rowOctet); Assert.Equal(s[6, 7], d[0, 0]); From 2a4b6968b58782b07447a27ae1855e2acca59157 Mon Sep 17 00:00:00 2001 From: James Jackson-South Date: Wed, 3 Feb 2021 04:11:00 +0000 Subject: [PATCH 03/14] Remove bounds checks during Emit --- .../Formats/Jpeg/JpegEncoderCore.cs | 77 +++++++++++-------- 1 file changed, 45 insertions(+), 32 deletions(-) diff --git a/src/ImageSharp/Formats/Jpeg/JpegEncoderCore.cs b/src/ImageSharp/Formats/Jpeg/JpegEncoderCore.cs index 31f2cfc3f3..422c7bd7ec 100644 --- a/src/ImageSharp/Formats/Jpeg/JpegEncoderCore.cs +++ b/src/ImageSharp/Formats/Jpeg/JpegEncoderCore.cs @@ -6,6 +6,7 @@ using System.IO; using System.Linq; using System.Runtime.CompilerServices; +using System.Runtime.InteropServices; using System.Threading; using SixLabors.ImageSharp.Common.Helpers; using SixLabors.ImageSharp.Formats.Jpeg.Components; @@ -313,7 +314,9 @@ private static void InitQuantizationTable(int i, int scale, ref Block8x8F quant) /// /// The packed bits. /// The number of bits - private void Emit(uint bits, uint count) + /// The reference to the emitBuffer. + [MethodImpl(MethodImplOptions.AggressiveInlining)] + private void Emit(uint bits, uint count, ref byte emitBufferBase) { count += this.bitCount; bits <<= (int)(32 - count); @@ -327,10 +330,10 @@ private void Emit(uint bits, uint count) while (count >= 8) { byte b = (byte)(bits >> 24); - this.emitBuffer[len++] = b; - if (b == 0xff) + Unsafe.Add(ref emitBufferBase, len++) = b; + if (b == byte.MaxValue) { - this.emitBuffer[len++] = 0x00; + Unsafe.Add(ref emitBufferBase, len++) = byte.MinValue; } bits <<= 8; @@ -352,11 +355,12 @@ private void Emit(uint bits, uint count) /// /// The index of the Huffman encoder /// The value to encode. + /// The reference to the emit buffer. [MethodImpl(MethodImplOptions.AggressiveInlining)] - private void EmitHuff(HuffIndex index, int value) + private void EmitHuff(HuffIndex index, int value, ref byte emitBufferBase) { uint x = HuffmanLut.TheHuffmanLut[(int)index].Values[value]; - this.Emit(x & ((1 << 24) - 1), x >> 24); + this.Emit(x & ((1 << 24) - 1), x >> 24, ref emitBufferBase); } /// @@ -365,8 +369,9 @@ private void EmitHuff(HuffIndex index, int value) /// The index of the Huffman encoder /// The number of copies to encode. /// The value to encode. + /// The reference to the emit buffer. [MethodImpl(MethodImplOptions.AggressiveInlining)] - private void EmitHuffRLE(HuffIndex index, int runLength, int value) + private void EmitHuffRLE(HuffIndex index, int runLength, int value, ref byte emitBufferBase) { int a = value; int b = value; @@ -386,10 +391,10 @@ private void EmitHuffRLE(HuffIndex index, int runLength, int value) bt = 8 + (uint)BitCountLut[a >> 8]; } - this.EmitHuff(index, (int)((uint)(runLength << 4) | bt)); + this.EmitHuff(index, (int)((uint)(runLength << 4) | bt), ref emitBufferBase); if (bt > 0) { - this.Emit((uint)b & (uint)((1 << ((int)bt)) - 1), bt); + this.Emit((uint)b & (uint)((1 << ((int)bt)) - 1), bt, ref emitBufferBase); } } @@ -399,7 +404,8 @@ private void EmitHuffRLE(HuffIndex index, int runLength, int value) /// The pixel format. /// The pixel accessor providing access to the image pixels. /// The token to monitor for cancellation. - private void Encode444(Image pixels, CancellationToken cancellationToken) + /// The reference to the emit buffer. + private void Encode444(Image pixels, CancellationToken cancellationToken, ref byte emitBufferBase) where TPixel : unmanaged, IPixel { // TODO: Need a JpegScanEncoder class or struct that encapsulates the scan-encoding implementation. (Similar to JpegScanDecoder.) @@ -436,7 +442,9 @@ private void Encode444(Image pixels, CancellationToken cancellat ref temp1, ref temp2, ref onStackLuminanceQuantTable, - ref unzig); + ref unzig, + ref emitBufferBase); + prevDCCb = this.WriteBlock( QuantIndex.Chrominance, prevDCCb, @@ -444,7 +452,9 @@ private void Encode444(Image pixels, CancellationToken cancellat ref temp1, ref temp2, ref onStackChrominanceQuantTable, - ref unzig); + ref unzig, + ref emitBufferBase); + prevDCCr = this.WriteBlock( QuantIndex.Chrominance, prevDCCr, @@ -452,7 +462,8 @@ private void Encode444(Image pixels, CancellationToken cancellat ref temp1, ref temp2, ref onStackChrominanceQuantTable, - ref unzig); + ref unzig, + ref emitBufferBase); } } } @@ -518,9 +529,8 @@ private void WriteApplicationHeader(ImageMetadata meta) /// Temporal block 2 /// Quantization table /// The 8x8 Unzig block. - /// - /// The - /// + /// The reference to the emit buffer. + /// The . private int WriteBlock( QuantIndex index, int prevDC, @@ -528,7 +538,8 @@ private int WriteBlock( ref Block8x8F tempDest1, ref Block8x8F tempDest2, ref Block8x8F quant, - ref ZigZag unZig) + ref ZigZag unZig, + ref byte emitBufferBase) { FastFloatingPointDCT.TransformFDCT(ref src, ref tempDest1, ref tempDest2); @@ -537,7 +548,7 @@ private int WriteBlock( int dc = (int)tempDest2[0]; // Emit the DC delta. - this.EmitHuffRLE((HuffIndex)((2 * (int)index) + 0), 0, dc - prevDC); + this.EmitHuffRLE((HuffIndex)((2 * (int)index) + 0), 0, dc - prevDC, ref emitBufferBase); // Emit the AC components. var h = (HuffIndex)((2 * (int)index) + 1); @@ -555,18 +566,18 @@ private int WriteBlock( { while (runLength > 15) { - this.EmitHuff(h, 0xf0); + this.EmitHuff(h, 0xf0, ref emitBufferBase); runLength -= 16; } - this.EmitHuffRLE(h, runLength, ac); + this.EmitHuffRLE(h, runLength, ac, ref emitBufferBase); runLength = 0; } } if (runLength > 0) { - this.EmitHuff(h, 0x00); + this.EmitHuff(h, 0x00, ref emitBufferBase); } return dc; @@ -748,9 +759,7 @@ private void WriteIptcProfile(IptcProfile iptcProfile) /// /// The length of the data the app1 marker contains. private void WriteApp1Header(int app1Length) - { - this.WriteAppHeader(app1Length, JpegConstants.Markers.APP1); - } + => this.WriteAppHeader(app1Length, JpegConstants.Markers.APP1); /// /// Writes a AppX header. @@ -954,19 +963,19 @@ private void WriteStartOfScan(Image image, CancellationToken can // TODO: Need a JpegScanEncoder class or struct that encapsulates the scan-encoding implementation. (Similar to JpegScanDecoder.) // TODO: We should allow grayscale writing. this.outputStream.Write(SosHeaderYCbCr); - + ref byte emitBufferBase = ref MemoryMarshal.GetReference(this.emitBuffer); switch (this.subsample) { case JpegSubsample.Ratio444: - this.Encode444(image, cancellationToken); + this.Encode444(image, cancellationToken, ref emitBufferBase); break; case JpegSubsample.Ratio420: - this.Encode420(image, cancellationToken); + this.Encode420(image, cancellationToken, ref emitBufferBase); break; } // Pad the last byte with 1's. - this.Emit(0x7f, 7); + this.Emit(0x7f, 7, ref emitBufferBase); } /// @@ -976,7 +985,8 @@ private void WriteStartOfScan(Image image, CancellationToken can /// The pixel format. /// The pixel accessor providing access to the image pixels. /// The token to monitor for cancellation. - private void Encode420(Image pixels, CancellationToken cancellationToken) + /// The reference to the emit buffer. + private void Encode420(Image pixels, CancellationToken cancellationToken, ref byte emitBufferBase) where TPixel : unmanaged, IPixel { // TODO: Need a JpegScanEncoder class or struct that encapsulates the scan-encoding implementation. (Similar to JpegScanDecoder.) @@ -1023,7 +1033,8 @@ private void Encode420(Image pixels, CancellationToken cancellat ref temp1, ref temp2, ref onStackLuminanceQuantTable, - ref unzig); + ref unzig, + ref emitBufferBase); } Block8x8F.Scale16X16To8X8(ref b, cb); @@ -1034,7 +1045,8 @@ private void Encode420(Image pixels, CancellationToken cancellat ref temp1, ref temp2, ref onStackChrominanceQuantTable, - ref unzig); + ref unzig, + ref emitBufferBase); Block8x8F.Scale16X16To8X8(ref b, cr); prevDCCr = this.WriteBlock( @@ -1044,7 +1056,8 @@ private void Encode420(Image pixels, CancellationToken cancellat ref temp1, ref temp2, ref onStackChrominanceQuantTable, - ref unzig); + ref unzig, + ref emitBufferBase); } } } From f3baa34d205c07c7027873d8e62ebcb445843c7f Mon Sep 17 00:00:00 2001 From: James Jackson-South Date: Wed, 3 Feb 2021 05:46:25 +0000 Subject: [PATCH 04/14] Add AVX Block8x8F.DivideRoundAll --- .../Formats/Jpeg/Components/Block8x8F.cs | 112 +++++++++++++----- 1 file changed, 83 insertions(+), 29 deletions(-) diff --git a/src/ImageSharp/Formats/Jpeg/Components/Block8x8F.cs b/src/ImageSharp/Formats/Jpeg/Components/Block8x8F.cs index 3a29e21d9c..368fae7a52 100644 --- a/src/ImageSharp/Formats/Jpeg/Components/Block8x8F.cs +++ b/src/ImageSharp/Formats/Jpeg/Components/Block8x8F.cs @@ -51,6 +51,10 @@ internal partial struct Block8x8F : IEquatable public Vector4 V7R; #pragma warning restore SA1600 // ElementsMustBeDocumented +#if SUPPORTS_RUNTIME_INTRINSICS + private static readonly Vector NegativeOneAvx = new Vector(-1F); + private static readonly Vector OffsetAxv = new Vector(.5F); +#endif private static readonly Vector4 NegativeOne = new Vector4(-1); private static readonly Vector4 Offset = new Vector4(.5F); @@ -556,22 +560,84 @@ private static unsafe void Scale16X16To8X8Scalar(ref Block8x8F destination, Read [MethodImpl(InliningOptions.ShortMethod)] private static void DivideRoundAll(ref Block8x8F a, ref Block8x8F b) { - a.V0L = DivideRound(a.V0L, b.V0L); - a.V0R = DivideRound(a.V0R, b.V0R); - a.V1L = DivideRound(a.V1L, b.V1L); - a.V1R = DivideRound(a.V1R, b.V1R); - a.V2L = DivideRound(a.V2L, b.V2L); - a.V2R = DivideRound(a.V2R, b.V2R); - a.V3L = DivideRound(a.V3L, b.V3L); - a.V3R = DivideRound(a.V3R, b.V3R); - a.V4L = DivideRound(a.V4L, b.V4L); - a.V4R = DivideRound(a.V4R, b.V4R); - a.V5L = DivideRound(a.V5L, b.V5L); - a.V5R = DivideRound(a.V5R, b.V5R); - a.V6L = DivideRound(a.V6L, b.V6L); - a.V6R = DivideRound(a.V6R, b.V6R); - a.V7L = DivideRound(a.V7L, b.V7L); - a.V7R = DivideRound(a.V7R, b.V7R); +#if SUPPORTS_RUNTIME_INTRINSICS + if (Avx.IsSupported) + { + Unsafe.As>(ref a.V0L) + = DivideRoundAvx(ref a.V0L, ref b.V0L); + + Unsafe.As>(ref a.V1L) + = DivideRoundAvx(ref a.V1L, ref b.V1L); + + Unsafe.As>(ref a.V2L) + = DivideRoundAvx(ref a.V2L, ref b.V2L); + + Unsafe.As>(ref a.V3L) + = DivideRoundAvx(ref a.V3L, ref b.V2L); + + Unsafe.As>(ref a.V4L) + = DivideRoundAvx(ref a.V4L, ref b.V4L); + + Unsafe.As>(ref a.V5L) + = DivideRoundAvx(ref a.V5L, ref b.V5L); + + Unsafe.As>(ref a.V6L) + = DivideRoundAvx(ref a.V6L, ref b.V6L); + + Unsafe.As>(ref a.V7L) + = DivideRoundAvx(ref a.V7L, ref b.V7L); + } + else +#endif + { + a.V0L = DivideRound(a.V0L, b.V0L); + a.V0R = DivideRound(a.V0R, b.V0R); + a.V1L = DivideRound(a.V1L, b.V1L); + a.V1R = DivideRound(a.V1R, b.V1R); + a.V2L = DivideRound(a.V2L, b.V2L); + a.V2R = DivideRound(a.V2R, b.V2R); + a.V3L = DivideRound(a.V3L, b.V3L); + a.V3R = DivideRound(a.V3R, b.V3R); + a.V4L = DivideRound(a.V4L, b.V4L); + a.V4R = DivideRound(a.V4R, b.V4R); + a.V5L = DivideRound(a.V5L, b.V5L); + a.V5R = DivideRound(a.V5R, b.V5R); + a.V6L = DivideRound(a.V6L, b.V6L); + a.V6R = DivideRound(a.V6R, b.V6R); + a.V7L = DivideRound(a.V7L, b.V7L); + a.V7R = DivideRound(a.V7R, b.V7R); + } + } + +#if SUPPORTS_RUNTIME_INTRINSICS + [MethodImpl(MethodImplOptions.AggressiveInlining)] + private static Vector256 DivideRoundAvx( + ref Vector4 dividend, + ref Vector4 divisor) + { + Vector vdividend = Unsafe.As>(ref dividend); + + // sign(dividend) = max(min(dividend, 1), -1) + Vector offset + = Vector.Min(Vector.Max(NegativeOneAvx, vdividend), Vector.One) * OffsetAxv; + + // AlmostRound(dividend/divisor) = dividend/divisor + 0.5*sign(dividend) + Vector256 v = Avx.Divide( + Unsafe.As, Vector256>(ref vdividend), + Unsafe.As>(ref divisor)); + + return Avx.Add(v, Unsafe.As, Vector256>(ref offset)); + } +#endif + + [MethodImpl(InliningOptions.ShortMethod)] + private static Vector4 DivideRound(Vector4 dividend, Vector4 divisor) + { + // sign(dividend) = max(min(dividend, 1), -1) + Vector4 sign = Numerics.Clamp(dividend, NegativeOne, Vector4.One); + + // AlmostRound(dividend/divisor) = dividend/divisor + 0.5*sign(dividend) + return (dividend / divisor) + (sign * Offset); } public void RoundInto(ref Block8x8 dest) @@ -673,8 +739,7 @@ public void LoadFromInt16ExtendedAvx2(ref Block8x8 source) /// public bool Equals(Block8x8F other) - { - return this.V0L == other.V0L + => this.V0L == other.V0L && this.V0R == other.V0R && this.V1L == other.V1L && this.V1R == other.V1R @@ -690,7 +755,6 @@ public bool Equals(Block8x8F other) && this.V6R == other.V6R && this.V7L == other.V7L && this.V7R == other.V7R; - } /// public override string ToString() @@ -718,16 +782,6 @@ private static Vector NormalizeAndRound(Vector row, Vector return row.FastRound(); } - [MethodImpl(InliningOptions.ShortMethod)] - private static Vector4 DivideRound(Vector4 dividend, Vector4 divisor) - { - // sign(dividend) = max(min(dividend, 1), -1) - Vector4 sign = Numerics.Clamp(dividend, NegativeOne, Vector4.One); - - // AlmostRound(dividend/divisor) = dividend/divisor + 0.5*sign(dividend) - return (dividend / divisor) + (sign * Offset); - } - [Conditional("DEBUG")] private static void GuardBlockIndex(int idx) { From 7e37d3d7b770981d3fccfd4671b24a62e2e39e83 Mon Sep 17 00:00:00 2001 From: James Jackson-South Date: Wed, 3 Feb 2021 06:25:56 +0000 Subject: [PATCH 05/14] Fix typo --- src/ImageSharp/Formats/Jpeg/Components/Block8x8F.cs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/ImageSharp/Formats/Jpeg/Components/Block8x8F.cs b/src/ImageSharp/Formats/Jpeg/Components/Block8x8F.cs index 368fae7a52..2232d4e4c4 100644 --- a/src/ImageSharp/Formats/Jpeg/Components/Block8x8F.cs +++ b/src/ImageSharp/Formats/Jpeg/Components/Block8x8F.cs @@ -573,7 +573,7 @@ private static void DivideRoundAll(ref Block8x8F a, ref Block8x8F b) = DivideRoundAvx(ref a.V2L, ref b.V2L); Unsafe.As>(ref a.V3L) - = DivideRoundAvx(ref a.V3L, ref b.V2L); + = DivideRoundAvx(ref a.V3L, ref b.V3L); Unsafe.As>(ref a.V4L) = DivideRoundAvx(ref a.V4L, ref b.V4L); From dee1bfcd9cdfcdb33918fb775c06f2eae177c851 Mon Sep 17 00:00:00 2001 From: James Jackson-South Date: Wed, 3 Feb 2021 07:50:27 +0000 Subject: [PATCH 06/14] Remove compiler conditional for static fields --- src/ImageSharp/Formats/Jpeg/Components/Block8x8F.cs | 2 -- 1 file changed, 2 deletions(-) diff --git a/src/ImageSharp/Formats/Jpeg/Components/Block8x8F.cs b/src/ImageSharp/Formats/Jpeg/Components/Block8x8F.cs index 2232d4e4c4..2103769f45 100644 --- a/src/ImageSharp/Formats/Jpeg/Components/Block8x8F.cs +++ b/src/ImageSharp/Formats/Jpeg/Components/Block8x8F.cs @@ -51,10 +51,8 @@ internal partial struct Block8x8F : IEquatable public Vector4 V7R; #pragma warning restore SA1600 // ElementsMustBeDocumented -#if SUPPORTS_RUNTIME_INTRINSICS private static readonly Vector NegativeOneAvx = new Vector(-1F); private static readonly Vector OffsetAxv = new Vector(.5F); -#endif private static readonly Vector4 NegativeOne = new Vector4(-1); private static readonly Vector4 Offset = new Vector4(.5F); From ca56515b2af30a0cdd092796ae76a4e2ef604cf2 Mon Sep 17 00:00:00 2001 From: James Jackson-South Date: Wed, 3 Feb 2021 08:18:35 +0000 Subject: [PATCH 07/14] Disable inlining --- src/ImageSharp/Formats/Jpeg/Components/Block8x8F.cs | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/src/ImageSharp/Formats/Jpeg/Components/Block8x8F.cs b/src/ImageSharp/Formats/Jpeg/Components/Block8x8F.cs index 2103769f45..342d12068d 100644 --- a/src/ImageSharp/Formats/Jpeg/Components/Block8x8F.cs +++ b/src/ImageSharp/Formats/Jpeg/Components/Block8x8F.cs @@ -51,8 +51,10 @@ internal partial struct Block8x8F : IEquatable public Vector4 V7R; #pragma warning restore SA1600 // ElementsMustBeDocumented +#if SUPPORTS_RUNTIME_INTRINSICS private static readonly Vector NegativeOneAvx = new Vector(-1F); private static readonly Vector OffsetAxv = new Vector(.5F); +#endif private static readonly Vector4 NegativeOne = new Vector4(-1); private static readonly Vector4 Offset = new Vector4(.5F); @@ -608,7 +610,7 @@ private static void DivideRoundAll(ref Block8x8F a, ref Block8x8F b) } #if SUPPORTS_RUNTIME_INTRINSICS - [MethodImpl(MethodImplOptions.AggressiveInlining)] + // [MethodImpl(MethodImplOptions.AggressiveInlining)] private static Vector256 DivideRoundAvx( ref Vector4 dividend, ref Vector4 divisor) From 3ff1c27193ad62586c070c61218ae37c74c589a7 Mon Sep 17 00:00:00 2001 From: James Jackson-South Date: Wed, 3 Feb 2021 10:34:06 +0000 Subject: [PATCH 08/14] manually inline meythod --- .../Formats/Jpeg/Components/Block8x8F.cs | 105 +++++++++++++----- 1 file changed, 76 insertions(+), 29 deletions(-) diff --git a/src/ImageSharp/Formats/Jpeg/Components/Block8x8F.cs b/src/ImageSharp/Formats/Jpeg/Components/Block8x8F.cs index 342d12068d..d814e50367 100644 --- a/src/ImageSharp/Formats/Jpeg/Components/Block8x8F.cs +++ b/src/ImageSharp/Formats/Jpeg/Components/Block8x8F.cs @@ -561,31 +561,99 @@ private static unsafe void Scale16X16To8X8Scalar(ref Block8x8F destination, Read private static void DivideRoundAll(ref Block8x8F a, ref Block8x8F b) { #if SUPPORTS_RUNTIME_INTRINSICS + + // Avx version is written inline to avoid JIT bugs on MacOS. if (Avx.IsSupported) { + // V0 + Vector vs = Unsafe.As>(ref a.V0L); + Vector voff + = Vector.Min(Vector.Max(NegativeOneAvx, vs), Vector.One) + * OffsetAxv; + + Vector256 v = Avx.Divide( + Unsafe.As, Vector256>(ref vs), + Unsafe.As>(ref b.V0L)); + Unsafe.As>(ref a.V0L) - = DivideRoundAvx(ref a.V0L, ref b.V0L); + = Avx.Add(v, Unsafe.As, Vector256>(ref voff)); + + // V1 + vs = Unsafe.As>(ref a.V1L); + voff = Vector.Min(Vector.Max(NegativeOneAvx, vs), Vector.One) * OffsetAxv; + + v = Avx.Divide( + Unsafe.As, Vector256>(ref vs), + Unsafe.As>(ref b.V1L)); Unsafe.As>(ref a.V1L) - = DivideRoundAvx(ref a.V1L, ref b.V1L); + = Avx.Add(v, Unsafe.As, Vector256>(ref voff)); + + // V2 + vs = Unsafe.As>(ref a.V2L); + voff = Vector.Min(Vector.Max(NegativeOneAvx, vs), Vector.One) * OffsetAxv; + + v = Avx.Divide( + Unsafe.As, Vector256>(ref vs), + Unsafe.As>(ref b.V2L)); Unsafe.As>(ref a.V2L) - = DivideRoundAvx(ref a.V2L, ref b.V2L); + = Avx.Add(v, Unsafe.As, Vector256>(ref voff)); + + // V3 + vs = Unsafe.As>(ref a.V3L); + voff = Vector.Min(Vector.Max(NegativeOneAvx, vs), Vector.One) * OffsetAxv; + + v = Avx.Divide( + Unsafe.As, Vector256>(ref vs), + Unsafe.As>(ref b.V3L)); Unsafe.As>(ref a.V3L) - = DivideRoundAvx(ref a.V3L, ref b.V3L); + = Avx.Add(v, Unsafe.As, Vector256>(ref voff)); + + // V4 + vs = Unsafe.As>(ref a.V4L); + voff = Vector.Min(Vector.Max(NegativeOneAvx, vs), Vector.One) * OffsetAxv; + + v = Avx.Divide( + Unsafe.As, Vector256>(ref vs), + Unsafe.As>(ref b.V4L)); Unsafe.As>(ref a.V4L) - = DivideRoundAvx(ref a.V4L, ref b.V4L); + = Avx.Add(v, Unsafe.As, Vector256>(ref voff)); + + // V5 + vs = Unsafe.As>(ref a.V5L); + voff = Vector.Min(Vector.Max(NegativeOneAvx, vs), Vector.One) * OffsetAxv; + + v = Avx.Divide( + Unsafe.As, Vector256>(ref vs), + Unsafe.As>(ref b.V5L)); Unsafe.As>(ref a.V5L) - = DivideRoundAvx(ref a.V5L, ref b.V5L); + = Avx.Add(v, Unsafe.As, Vector256>(ref voff)); + + // V6 + vs = Unsafe.As>(ref a.V6L); + voff = Vector.Min(Vector.Max(NegativeOneAvx, vs), Vector.One) * OffsetAxv; + + v = Avx.Divide( + Unsafe.As, Vector256>(ref vs), + Unsafe.As>(ref b.V6L)); Unsafe.As>(ref a.V6L) - = DivideRoundAvx(ref a.V6L, ref b.V6L); + = Avx.Add(v, Unsafe.As, Vector256>(ref voff)); + + // V7 + vs = Unsafe.As>(ref a.V7L); + voff = Vector.Min(Vector.Max(NegativeOneAvx, vs), Vector.One) * OffsetAxv; + + v = Avx.Divide( + Unsafe.As, Vector256>(ref vs), + Unsafe.As>(ref b.V7L)); Unsafe.As>(ref a.V7L) - = DivideRoundAvx(ref a.V7L, ref b.V7L); + = Avx.Add(v, Unsafe.As, Vector256>(ref voff)); } else #endif @@ -609,27 +677,6 @@ private static void DivideRoundAll(ref Block8x8F a, ref Block8x8F b) } } -#if SUPPORTS_RUNTIME_INTRINSICS - // [MethodImpl(MethodImplOptions.AggressiveInlining)] - private static Vector256 DivideRoundAvx( - ref Vector4 dividend, - ref Vector4 divisor) - { - Vector vdividend = Unsafe.As>(ref dividend); - - // sign(dividend) = max(min(dividend, 1), -1) - Vector offset - = Vector.Min(Vector.Max(NegativeOneAvx, vdividend), Vector.One) * OffsetAxv; - - // AlmostRound(dividend/divisor) = dividend/divisor + 0.5*sign(dividend) - Vector256 v = Avx.Divide( - Unsafe.As, Vector256>(ref vdividend), - Unsafe.As>(ref divisor)); - - return Avx.Add(v, Unsafe.As, Vector256>(ref offset)); - } -#endif - [MethodImpl(InliningOptions.ShortMethod)] private static Vector4 DivideRound(Vector4 dividend, Vector4 divisor) { From a1042ce32bff0e6a83dc96641b5afdf152822c14 Mon Sep 17 00:00:00 2001 From: James Jackson-South Date: Wed, 3 Feb 2021 11:04:32 +0000 Subject: [PATCH 09/14] Try explicit layout --- src/ImageSharp/Formats/Jpeg/Components/Block8x8F.cs | 1 + 1 file changed, 1 insertion(+) diff --git a/src/ImageSharp/Formats/Jpeg/Components/Block8x8F.cs b/src/ImageSharp/Formats/Jpeg/Components/Block8x8F.cs index d814e50367..1c3a6be19b 100644 --- a/src/ImageSharp/Formats/Jpeg/Components/Block8x8F.cs +++ b/src/ImageSharp/Formats/Jpeg/Components/Block8x8F.cs @@ -18,6 +18,7 @@ namespace SixLabors.ImageSharp.Formats.Jpeg.Components /// /// Represents a Jpeg block with coefficients. /// + [StructLayout(LayoutKind.Sequential)] internal partial struct Block8x8F : IEquatable { /// From f87b38c2c7b0eb612a0f6c3b5acaec5fa8597c70 Mon Sep 17 00:00:00 2001 From: James Jackson-South Date: Wed, 3 Feb 2021 11:34:39 +0000 Subject: [PATCH 10/14] Move statics to local --- .../Formats/Jpeg/Components/Block8x8F.cs | 37 +++++++++---------- 1 file changed, 18 insertions(+), 19 deletions(-) diff --git a/src/ImageSharp/Formats/Jpeg/Components/Block8x8F.cs b/src/ImageSharp/Formats/Jpeg/Components/Block8x8F.cs index 1c3a6be19b..65e632cd2c 100644 --- a/src/ImageSharp/Formats/Jpeg/Components/Block8x8F.cs +++ b/src/ImageSharp/Formats/Jpeg/Components/Block8x8F.cs @@ -52,13 +52,6 @@ internal partial struct Block8x8F : IEquatable public Vector4 V7R; #pragma warning restore SA1600 // ElementsMustBeDocumented -#if SUPPORTS_RUNTIME_INTRINSICS - private static readonly Vector NegativeOneAvx = new Vector(-1F); - private static readonly Vector OffsetAxv = new Vector(.5F); -#endif - private static readonly Vector4 NegativeOne = new Vector4(-1); - private static readonly Vector4 Offset = new Vector4(.5F); - /// /// Get/Set scalar elements at a given index /// @@ -566,11 +559,14 @@ private static void DivideRoundAll(ref Block8x8F a, ref Block8x8F b) // Avx version is written inline to avoid JIT bugs on MacOS. if (Avx.IsSupported) { + var vneg = new Vector(-1F); + var vadd = new Vector(.5F); + // V0 Vector vs = Unsafe.As>(ref a.V0L); Vector voff - = Vector.Min(Vector.Max(NegativeOneAvx, vs), Vector.One) - * OffsetAxv; + = Vector.Min(Vector.Max(vneg, vs), Vector.One) + * vadd; Vector256 v = Avx.Divide( Unsafe.As, Vector256>(ref vs), @@ -581,7 +577,7 @@ Vector voff // V1 vs = Unsafe.As>(ref a.V1L); - voff = Vector.Min(Vector.Max(NegativeOneAvx, vs), Vector.One) * OffsetAxv; + voff = Vector.Min(Vector.Max(vneg, vs), Vector.One) * vadd; v = Avx.Divide( Unsafe.As, Vector256>(ref vs), @@ -592,7 +588,7 @@ Vector voff // V2 vs = Unsafe.As>(ref a.V2L); - voff = Vector.Min(Vector.Max(NegativeOneAvx, vs), Vector.One) * OffsetAxv; + voff = Vector.Min(Vector.Max(vneg, vs), Vector.One) * vadd; v = Avx.Divide( Unsafe.As, Vector256>(ref vs), @@ -603,7 +599,7 @@ Vector voff // V3 vs = Unsafe.As>(ref a.V3L); - voff = Vector.Min(Vector.Max(NegativeOneAvx, vs), Vector.One) * OffsetAxv; + voff = Vector.Min(Vector.Max(vneg, vs), Vector.One) * vadd; v = Avx.Divide( Unsafe.As, Vector256>(ref vs), @@ -614,7 +610,7 @@ Vector voff // V4 vs = Unsafe.As>(ref a.V4L); - voff = Vector.Min(Vector.Max(NegativeOneAvx, vs), Vector.One) * OffsetAxv; + voff = Vector.Min(Vector.Max(vneg, vs), Vector.One) * vadd; v = Avx.Divide( Unsafe.As, Vector256>(ref vs), @@ -625,7 +621,7 @@ Vector voff // V5 vs = Unsafe.As>(ref a.V5L); - voff = Vector.Min(Vector.Max(NegativeOneAvx, vs), Vector.One) * OffsetAxv; + voff = Vector.Min(Vector.Max(vneg, vs), Vector.One) * vadd; v = Avx.Divide( Unsafe.As, Vector256>(ref vs), @@ -636,7 +632,7 @@ Vector voff // V6 vs = Unsafe.As>(ref a.V6L); - voff = Vector.Min(Vector.Max(NegativeOneAvx, vs), Vector.One) * OffsetAxv; + voff = Vector.Min(Vector.Max(vneg, vs), Vector.One) * vadd; v = Avx.Divide( Unsafe.As, Vector256>(ref vs), @@ -647,7 +643,7 @@ Vector voff // V7 vs = Unsafe.As>(ref a.V7L); - voff = Vector.Min(Vector.Max(NegativeOneAvx, vs), Vector.One) * OffsetAxv; + voff = Vector.Min(Vector.Max(vneg, vs), Vector.One) * vadd; v = Avx.Divide( Unsafe.As, Vector256>(ref vs), @@ -678,14 +674,17 @@ Vector voff } } - [MethodImpl(InliningOptions.ShortMethod)] + [MethodImpl(MethodImplOptions.AggressiveInlining)] private static Vector4 DivideRound(Vector4 dividend, Vector4 divisor) { + var neg = new Vector4(-1); + var add = new Vector4(.5F); + // sign(dividend) = max(min(dividend, 1), -1) - Vector4 sign = Numerics.Clamp(dividend, NegativeOne, Vector4.One); + Vector4 sign = Numerics.Clamp(dividend, neg, Vector4.One); // AlmostRound(dividend/divisor) = dividend/divisor + 0.5*sign(dividend) - return (dividend / divisor) + (sign * Offset); + return (dividend / divisor) + (sign * add); } public void RoundInto(ref Block8x8 dest) From 6f367caaa48086c8ced365ee0826ddf68cd86a7c Mon Sep 17 00:00:00 2001 From: James Jackson-South Date: Wed, 3 Feb 2021 11:43:39 +0000 Subject: [PATCH 11/14] Update Block8x8F.cs --- .../Formats/Jpeg/Components/Block8x8F.cs | 113 ++++++------------ 1 file changed, 35 insertions(+), 78 deletions(-) diff --git a/src/ImageSharp/Formats/Jpeg/Components/Block8x8F.cs b/src/ImageSharp/Formats/Jpeg/Components/Block8x8F.cs index 65e632cd2c..30bb59f1ba 100644 --- a/src/ImageSharp/Formats/Jpeg/Components/Block8x8F.cs +++ b/src/ImageSharp/Formats/Jpeg/Components/Block8x8F.cs @@ -555,102 +555,59 @@ private static unsafe void Scale16X16To8X8Scalar(ref Block8x8F destination, Read private static void DivideRoundAll(ref Block8x8F a, ref Block8x8F b) { #if SUPPORTS_RUNTIME_INTRINSICS - - // Avx version is written inline to avoid JIT bugs on MacOS. if (Avx.IsSupported) { - var vneg = new Vector(-1F); - var vadd = new Vector(.5F); + var vnegOne = Vector256.Create(-1f); + var vadd = Vector256.Create(.5F); + var vone = Vector256.Create(1f); // V0 - Vector vs = Unsafe.As>(ref a.V0L); - Vector voff - = Vector.Min(Vector.Max(vneg, vs), Vector.One) - * vadd; - - Vector256 v = Avx.Divide( - Unsafe.As, Vector256>(ref vs), - Unsafe.As>(ref b.V0L)); - - Unsafe.As>(ref a.V0L) - = Avx.Add(v, Unsafe.As, Vector256>(ref voff)); + Vector256 vs0 = Unsafe.As>(ref a.V0L); + Vector256 voff0 = Avx.Multiply(Avx.Min(Avx.Max(vnegOne, vs0), vone), vadd); + Vector256 v0 = Avx.Divide(vs0, Unsafe.As>(ref b.V0L)); + Unsafe.As>(ref a.V0L) = Avx.Add(v0, voff0); // V1 - vs = Unsafe.As>(ref a.V1L); - voff = Vector.Min(Vector.Max(vneg, vs), Vector.One) * vadd; - - v = Avx.Divide( - Unsafe.As, Vector256>(ref vs), - Unsafe.As>(ref b.V1L)); - - Unsafe.As>(ref a.V1L) - = Avx.Add(v, Unsafe.As, Vector256>(ref voff)); + Vector256 vs1 = Unsafe.As>(ref a.V1L); + Vector256 voff1 = Avx.Multiply(Avx.Min(Avx.Max(vnegOne, vs1), vone), vadd); + Vector256 v1 = Avx.Divide(vs1, Unsafe.As>(ref b.V1L)); + Unsafe.As>(ref a.V1L) = Avx.Add(v1, voff1); // V2 - vs = Unsafe.As>(ref a.V2L); - voff = Vector.Min(Vector.Max(vneg, vs), Vector.One) * vadd; - - v = Avx.Divide( - Unsafe.As, Vector256>(ref vs), - Unsafe.As>(ref b.V2L)); - - Unsafe.As>(ref a.V2L) - = Avx.Add(v, Unsafe.As, Vector256>(ref voff)); + Vector256 vs2 = Unsafe.As>(ref a.V2L); + Vector256 voff2 = Avx.Multiply(Avx.Min(Avx.Max(vnegOne, vs2), vone), vadd); + Vector256 v2 = Avx.Divide(vs2, Unsafe.As>(ref b.V2L)); + Unsafe.As>(ref a.V2L) = Avx.Add(v2, voff2); // V3 - vs = Unsafe.As>(ref a.V3L); - voff = Vector.Min(Vector.Max(vneg, vs), Vector.One) * vadd; - - v = Avx.Divide( - Unsafe.As, Vector256>(ref vs), - Unsafe.As>(ref b.V3L)); - - Unsafe.As>(ref a.V3L) - = Avx.Add(v, Unsafe.As, Vector256>(ref voff)); + Vector256 vs3 = Unsafe.As>(ref a.V3L); + Vector256 voff3 = Avx.Multiply(Avx.Min(Avx.Max(vnegOne, vs3), vone), vadd); + Vector256 v3 = Avx.Divide(vs3, Unsafe.As>(ref b.V3L)); + Unsafe.As>(ref a.V3L) = Avx.Add(v3, voff3); // V4 - vs = Unsafe.As>(ref a.V4L); - voff = Vector.Min(Vector.Max(vneg, vs), Vector.One) * vadd; - - v = Avx.Divide( - Unsafe.As, Vector256>(ref vs), - Unsafe.As>(ref b.V4L)); - - Unsafe.As>(ref a.V4L) - = Avx.Add(v, Unsafe.As, Vector256>(ref voff)); + Vector256 vs4 = Unsafe.As>(ref a.V4L); + Vector256 voff4 = Avx.Multiply(Avx.Min(Avx.Max(vnegOne, vs4), vone), vadd); + Vector256 v4 = Avx.Divide(vs4, Unsafe.As>(ref b.V4L)); + Unsafe.As>(ref a.V4L) = Avx.Add(v4, voff4); // V5 - vs = Unsafe.As>(ref a.V5L); - voff = Vector.Min(Vector.Max(vneg, vs), Vector.One) * vadd; - - v = Avx.Divide( - Unsafe.As, Vector256>(ref vs), - Unsafe.As>(ref b.V5L)); - - Unsafe.As>(ref a.V5L) - = Avx.Add(v, Unsafe.As, Vector256>(ref voff)); + Vector256 vs5 = Unsafe.As>(ref a.V5L); + Vector256 voff5 = Avx.Multiply(Avx.Min(Avx.Max(vnegOne, vs5), vone), vadd); + Vector256 v5 = Avx.Divide(vs5, Unsafe.As>(ref b.V5L)); + Unsafe.As>(ref a.V5L) = Avx.Add(v5, voff5); // V6 - vs = Unsafe.As>(ref a.V6L); - voff = Vector.Min(Vector.Max(vneg, vs), Vector.One) * vadd; - - v = Avx.Divide( - Unsafe.As, Vector256>(ref vs), - Unsafe.As>(ref b.V6L)); - - Unsafe.As>(ref a.V6L) - = Avx.Add(v, Unsafe.As, Vector256>(ref voff)); + Vector256 vs6 = Unsafe.As>(ref a.V6L); + Vector256 voff6 = Avx.Multiply(Avx.Min(Avx.Max(vnegOne, vs6), vone), vadd); + Vector256 v6 = Avx.Divide(vs6, Unsafe.As>(ref b.V6L)); + Unsafe.As>(ref a.V6L) = Avx.Add(v6, voff6); // V7 - vs = Unsafe.As>(ref a.V7L); - voff = Vector.Min(Vector.Max(vneg, vs), Vector.One) * vadd; - - v = Avx.Divide( - Unsafe.As, Vector256>(ref vs), - Unsafe.As>(ref b.V7L)); - - Unsafe.As>(ref a.V7L) - = Avx.Add(v, Unsafe.As, Vector256>(ref voff)); + Vector256 vs7 = Unsafe.As>(ref a.V7L); + Vector256 voff7 = Avx.Multiply(Avx.Min(Avx.Max(vnegOne, vs7), vone), vadd); + Vector256 v7 = Avx.Divide(vs7, Unsafe.As>(ref b.V7L)); + Unsafe.As>(ref a.V7L) = Avx.Add(v7, voff7); } else #endif From aa8dcd9a0950ebb7327f4f94129208f0a07e26de Mon Sep 17 00:00:00 2001 From: James Jackson-South Date: Wed, 3 Feb 2021 12:29:20 +0000 Subject: [PATCH 12/14] Use a loop instead. --- .../Formats/Jpeg/Components/Block8x8F.cs | 61 +++++-------------- 1 file changed, 14 insertions(+), 47 deletions(-) diff --git a/src/ImageSharp/Formats/Jpeg/Components/Block8x8F.cs b/src/ImageSharp/Formats/Jpeg/Components/Block8x8F.cs index 30bb59f1ba..87b8d8fb22 100644 --- a/src/ImageSharp/Formats/Jpeg/Components/Block8x8F.cs +++ b/src/ImageSharp/Formats/Jpeg/Components/Block8x8F.cs @@ -561,53 +561,20 @@ private static void DivideRoundAll(ref Block8x8F a, ref Block8x8F b) var vadd = Vector256.Create(.5F); var vone = Vector256.Create(1f); - // V0 - Vector256 vs0 = Unsafe.As>(ref a.V0L); - Vector256 voff0 = Avx.Multiply(Avx.Min(Avx.Max(vnegOne, vs0), vone), vadd); - Vector256 v0 = Avx.Divide(vs0, Unsafe.As>(ref b.V0L)); - Unsafe.As>(ref a.V0L) = Avx.Add(v0, voff0); - - // V1 - Vector256 vs1 = Unsafe.As>(ref a.V1L); - Vector256 voff1 = Avx.Multiply(Avx.Min(Avx.Max(vnegOne, vs1), vone), vadd); - Vector256 v1 = Avx.Divide(vs1, Unsafe.As>(ref b.V1L)); - Unsafe.As>(ref a.V1L) = Avx.Add(v1, voff1); - - // V2 - Vector256 vs2 = Unsafe.As>(ref a.V2L); - Vector256 voff2 = Avx.Multiply(Avx.Min(Avx.Max(vnegOne, vs2), vone), vadd); - Vector256 v2 = Avx.Divide(vs2, Unsafe.As>(ref b.V2L)); - Unsafe.As>(ref a.V2L) = Avx.Add(v2, voff2); - - // V3 - Vector256 vs3 = Unsafe.As>(ref a.V3L); - Vector256 voff3 = Avx.Multiply(Avx.Min(Avx.Max(vnegOne, vs3), vone), vadd); - Vector256 v3 = Avx.Divide(vs3, Unsafe.As>(ref b.V3L)); - Unsafe.As>(ref a.V3L) = Avx.Add(v3, voff3); - - // V4 - Vector256 vs4 = Unsafe.As>(ref a.V4L); - Vector256 voff4 = Avx.Multiply(Avx.Min(Avx.Max(vnegOne, vs4), vone), vadd); - Vector256 v4 = Avx.Divide(vs4, Unsafe.As>(ref b.V4L)); - Unsafe.As>(ref a.V4L) = Avx.Add(v4, voff4); - - // V5 - Vector256 vs5 = Unsafe.As>(ref a.V5L); - Vector256 voff5 = Avx.Multiply(Avx.Min(Avx.Max(vnegOne, vs5), vone), vadd); - Vector256 v5 = Avx.Divide(vs5, Unsafe.As>(ref b.V5L)); - Unsafe.As>(ref a.V5L) = Avx.Add(v5, voff5); - - // V6 - Vector256 vs6 = Unsafe.As>(ref a.V6L); - Vector256 voff6 = Avx.Multiply(Avx.Min(Avx.Max(vnegOne, vs6), vone), vadd); - Vector256 v6 = Avx.Divide(vs6, Unsafe.As>(ref b.V6L)); - Unsafe.As>(ref a.V6L) = Avx.Add(v6, voff6); - - // V7 - Vector256 vs7 = Unsafe.As>(ref a.V7L); - Vector256 voff7 = Avx.Multiply(Avx.Min(Avx.Max(vnegOne, vs7), vone), vadd); - Vector256 v7 = Avx.Divide(vs7, Unsafe.As>(ref b.V7L)); - Unsafe.As>(ref a.V7L) = Avx.Add(v7, voff7); + ref Vector256 aBase = ref Unsafe.AsRef(Unsafe.As>(ref a.V0L)); + ref Vector256 bBase = ref Unsafe.AsRef(Unsafe.As>(ref b.V0L)); + ref Vector256 aEnd = ref Unsafe.Add(ref aBase, 8); + + while (Unsafe.IsAddressLessThan(ref aBase, ref aEnd)) + { + Vector256 va = Unsafe.Add(ref aBase, 0); + Vector256 voff = Avx.Multiply(Avx.Min(Avx.Max(vnegOne, va), vone), vadd); + Vector256 vdiv = Avx.Divide(va, Unsafe.Add(ref bBase, 0)); + Unsafe.Add(ref aBase, 0) = Avx.Add(Avx.Divide(va, Unsafe.Add(ref bBase, 0)), voff); + + aBase = ref Unsafe.Add(ref aBase, 1); + bBase = ref Unsafe.Add(ref bBase, 1); + } } else #endif From 14226ce72ae0cbad85822575ea5abbcb5dd3e322 Mon Sep 17 00:00:00 2001 From: James Jackson-South Date: Wed, 3 Feb 2021 12:59:38 +0000 Subject: [PATCH 13/14] Cleanup --- src/ImageSharp/Formats/Jpeg/Components/Block8x8F.cs | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/src/ImageSharp/Formats/Jpeg/Components/Block8x8F.cs b/src/ImageSharp/Formats/Jpeg/Components/Block8x8F.cs index 87b8d8fb22..3a0fce781d 100644 --- a/src/ImageSharp/Formats/Jpeg/Components/Block8x8F.cs +++ b/src/ImageSharp/Formats/Jpeg/Components/Block8x8F.cs @@ -567,10 +567,8 @@ private static void DivideRoundAll(ref Block8x8F a, ref Block8x8F b) while (Unsafe.IsAddressLessThan(ref aBase, ref aEnd)) { - Vector256 va = Unsafe.Add(ref aBase, 0); - Vector256 voff = Avx.Multiply(Avx.Min(Avx.Max(vnegOne, va), vone), vadd); - Vector256 vdiv = Avx.Divide(va, Unsafe.Add(ref bBase, 0)); - Unsafe.Add(ref aBase, 0) = Avx.Add(Avx.Divide(va, Unsafe.Add(ref bBase, 0)), voff); + Vector256 voff = Avx.Multiply(Avx.Min(Avx.Max(vnegOne, aBase), vone), vadd); + Unsafe.Add(ref aBase, 0) = Avx.Add(Avx.Divide(aBase, bBase), voff); aBase = ref Unsafe.Add(ref aBase, 1); bBase = ref Unsafe.Add(ref bBase, 1); From 461e59dc530d5ab9ad9d801bd05fc6c68e07f39d Mon Sep 17 00:00:00 2001 From: James Jackson-South Date: Thu, 4 Feb 2021 03:42:06 +0000 Subject: [PATCH 14/14] Optimize and fix warnings. --- .../Formats/Jpeg/Components/Block8x8F.cs | 17 +++++------------ .../Formats/Jpeg/Components/RowOctet.cs | 11 ++++++----- .../Block8x8F_Scale16X16To8X8.cs | 5 ++++- .../Image/ImageTests.WrapMemory.cs | 2 +- .../TestUtilities/TestEnvironment.cs | 2 +- 5 files changed, 17 insertions(+), 20 deletions(-) diff --git a/src/ImageSharp/Formats/Jpeg/Components/Block8x8F.cs b/src/ImageSharp/Formats/Jpeg/Components/Block8x8F.cs index 3a0fce781d..ddbac2d072 100644 --- a/src/ImageSharp/Formats/Jpeg/Components/Block8x8F.cs +++ b/src/ImageSharp/Formats/Jpeg/Components/Block8x8F.cs @@ -153,10 +153,7 @@ public static Block8x8F Load(Span data) /// [MethodImpl(InliningOptions.ShortMethod)] public void Clear() - { - // The cheapest way to do this in C#: - this = default; - } + => this = default; // The cheapest way to do this in C#: /// /// Load raw 32bit floating point data from source. @@ -178,9 +175,7 @@ public void LoadFrom(Span source) /// Source [MethodImpl(InliningOptions.ShortMethod)] public static unsafe void LoadFrom(Block8x8F* blockPtr, Span source) - { - blockPtr->LoadFrom(source); - } + => blockPtr->LoadFrom(source); /// /// Load raw 32bit floating point data from source @@ -234,9 +229,7 @@ public static unsafe void ScaledCopyTo(Block8x8F* blockPtr, Span dest) /// The destination. [MethodImpl(InliningOptions.ShortMethod)] public static unsafe void ScaledCopyTo(Block8x8F* blockPtr, Span dest) - { - blockPtr->ScaledCopyTo(dest); - } + => blockPtr->ScaledCopyTo(dest); /// /// Copy raw 32bit floating point data to dest @@ -437,7 +430,6 @@ public void AddInPlace(float value) /// The block pointer. /// The qt pointer. /// Unzig pointer - // [MethodImpl(MethodImplOptions.AggressiveInlining)] public static unsafe void DequantizeBlock(Block8x8F* blockPtr, Block8x8F* qtPtr, byte* unzigPtr) { float* b = (float*)blockPtr; @@ -565,7 +557,7 @@ private static void DivideRoundAll(ref Block8x8F a, ref Block8x8F b) ref Vector256 bBase = ref Unsafe.AsRef(Unsafe.As>(ref b.V0L)); ref Vector256 aEnd = ref Unsafe.Add(ref aBase, 8); - while (Unsafe.IsAddressLessThan(ref aBase, ref aEnd)) + do { Vector256 voff = Avx.Multiply(Avx.Min(Avx.Max(vnegOne, aBase), vone), vadd); Unsafe.Add(ref aBase, 0) = Avx.Add(Avx.Divide(aBase, bBase), voff); @@ -573,6 +565,7 @@ private static void DivideRoundAll(ref Block8x8F a, ref Block8x8F b) aBase = ref Unsafe.Add(ref aBase, 1); bBase = ref Unsafe.Add(ref bBase, 1); } + while (Unsafe.IsAddressLessThan(ref aBase, ref aEnd)); } else #endif diff --git a/src/ImageSharp/Formats/Jpeg/Components/RowOctet.cs b/src/ImageSharp/Formats/Jpeg/Components/RowOctet.cs index 8234c3974e..f35bb44682 100644 --- a/src/ImageSharp/Formats/Jpeg/Components/RowOctet.cs +++ b/src/ImageSharp/Formats/Jpeg/Components/RowOctet.cs @@ -78,14 +78,15 @@ private set [MethodImpl(MethodImplOptions.AggressiveInlining)] public void Update(Buffer2D buffer, int startY) { - int y = startY; - int height = buffer.Height; - // We don't actually have to assign values outside of the // frame pixel buffer since they are never requested. - for (int i = 0; i < 8 && y < height; i++) + int y = startY; + int yEnd = Math.Min(y + 8, buffer.Height); + + int i = 0; + while (y < yEnd) { - this[i] = buffer.GetRowSpan(y++); + this[i++] = buffer.GetRowSpan(y++); } } diff --git a/tests/ImageSharp.Benchmarks/Codecs/Jpeg/BlockOperations/Block8x8F_Scale16X16To8X8.cs b/tests/ImageSharp.Benchmarks/Codecs/Jpeg/BlockOperations/Block8x8F_Scale16X16To8X8.cs index 8188297608..ebd3e40130 100644 --- a/tests/ImageSharp.Benchmarks/Codecs/Jpeg/BlockOperations/Block8x8F_Scale16X16To8X8.cs +++ b/tests/ImageSharp.Benchmarks/Codecs/Jpeg/BlockOperations/Block8x8F_Scale16X16To8X8.cs @@ -1,3 +1,6 @@ +// Copyright (c) Six Labors. +// Licensed under the Apache License, Version 2.0. + using System; using BenchmarkDotNet.Attributes; using SixLabors.ImageSharp.Formats.Jpeg.Components; @@ -15,7 +18,7 @@ public void Setup() { var random = new Random(); - float[] f = new float[8*8]; + float[] f = new float[8 * 8]; for (int i = 0; i < f.Length; i++) { f[i] = (float)random.NextDouble(); diff --git a/tests/ImageSharp.Tests/Image/ImageTests.WrapMemory.cs b/tests/ImageSharp.Tests/Image/ImageTests.WrapMemory.cs index 16d0baff39..17c73cc834 100644 --- a/tests/ImageSharp.Tests/Image/ImageTests.WrapMemory.cs +++ b/tests/ImageSharp.Tests/Image/ImageTests.WrapMemory.cs @@ -375,7 +375,7 @@ public void WrapMemory_MemoryOfT_ValidSize(int size, int height, int width) var array = new Rgba32[size]; var memory = new Memory(array); - Image.WrapMemory(memory, height, width); + Image.WrapMemory(memory, height, width); } private class TestMemoryOwner : IMemoryOwner diff --git a/tests/ImageSharp.Tests/TestUtilities/TestEnvironment.cs b/tests/ImageSharp.Tests/TestUtilities/TestEnvironment.cs index b80a29646c..8d1b0f7938 100644 --- a/tests/ImageSharp.Tests/TestUtilities/TestEnvironment.cs +++ b/tests/ImageSharp.Tests/TestUtilities/TestEnvironment.cs @@ -263,7 +263,7 @@ static FileInfo Find(DirectoryInfo root, string name) private static string GetNetCoreVersion() { Assembly assembly = typeof(System.Runtime.GCSettings).GetTypeInfo().Assembly; - string[] assemblyPath = assembly.CodeBase.Split(new[] { '/', '\\' }, StringSplitOptions.RemoveEmptyEntries); + string[] assemblyPath = assembly.Location.Split(new[] { '/', '\\' }, StringSplitOptions.RemoveEmptyEntries); int netCoreAppIndex = Array.IndexOf(assemblyPath, "Microsoft.NETCore.App"); if (netCoreAppIndex > 0 && netCoreAppIndex < assemblyPath.Length - 2) {