diff --git a/src/SharpCompress/Archives/IArchiveEntryExtensions.cs b/src/SharpCompress/Archives/IArchiveEntryExtensions.cs index 3bf940351..378bed265 100644 --- a/src/SharpCompress/Archives/IArchiveEntryExtensions.cs +++ b/src/SharpCompress/Archives/IArchiveEntryExtensions.cs @@ -9,7 +9,7 @@ namespace SharpCompress.Archives; public static class IArchiveEntryExtensions { - private const int BufferSize = 81920; + private const int BufferSize = 1048576; // 1MB buffer for better disk I/O performance /// The archive entry to extract. extension(IArchiveEntry archiveEntry) @@ -135,7 +135,14 @@ public void WriteToFile(string destinationFileName, ExtractionOptions? options = options, (x, fm) => { - using var fs = File.Open(destinationFileName, fm); + // Use larger buffer for better disk I/O performance + using var fs = new FileStream( + destinationFileName, + fm, + FileAccess.Write, + FileShare.None, + bufferSize: 1048576 + ); // 1MB buffer entry.WriteTo(fs); } ); @@ -155,7 +162,15 @@ await ExtractionMethods options, async (x, fm, ct) => { - using var fs = File.Open(destinationFileName, fm); + // Use async I/O with large buffer for better performance + using var fs = new FileStream( + destinationFileName, + fm, + FileAccess.Write, + FileShare.None, + bufferSize: 1048576, + useAsync: true + ); // 1MB buffer await entry.WriteToAsync(fs, null, ct).ConfigureAwait(false); }, cancellationToken diff --git a/src/SharpCompress/Common/ExtractionMethods.cs b/src/SharpCompress/Common/ExtractionMethods.cs index 787771de9..b7dd13991 100644 --- a/src/SharpCompress/Common/ExtractionMethods.cs +++ b/src/SharpCompress/Common/ExtractionMethods.cs @@ -27,7 +27,6 @@ public static void WriteEntryToDirectory( Action write ) { - string destinationFileName; var fullDestinationDirectoryPath = Path.GetFullPath(destinationDirectory); //check for trailing slash. @@ -48,35 +47,40 @@ public static void WriteEntryToDirectory( options ??= new ExtractionOptions() { Overwrite = true }; - var file = Path.GetFileName(entry.Key.NotNull("Entry Key is null")).NotNull("File is null"); + // Cache entry.Key to avoid multiple property access + var entryKey = entry.Key.NotNull("Entry Key is null"); + var file = Path.GetFileName(entryKey).NotNull("File is null"); file = Utility.ReplaceInvalidFileNameChars(file); + + string destinationFileName; if (options.ExtractFullPath) { - var folder = Path.GetDirectoryName(entry.Key.NotNull("Entry Key is null")) - .NotNull("Directory is null"); - var destdir = Path.GetFullPath(Path.Combine(fullDestinationDirectoryPath, folder)); + var folder = Path.GetDirectoryName(entryKey).NotNull("Directory is null"); + // Combine paths first, then get full path once + destinationFileName = Path.GetFullPath( + Path.Combine(fullDestinationDirectoryPath, folder, file) + ); + + // Security check before directory creation + if (!destinationFileName.StartsWith(fullDestinationDirectoryPath, PathComparison)) + { + throw new ExtractionException( + "Entry is trying to write a file outside of the destination directory." + ); + } - if (!Directory.Exists(destdir)) + // Only create parent directory if needed (Directory.CreateDirectory is idempotent but still has overhead) + var parentDir = Path.GetDirectoryName(destinationFileName)!; + if (!Directory.Exists(parentDir)) { - if (!destdir.StartsWith(fullDestinationDirectoryPath, PathComparison)) - { - throw new ExtractionException( - "Entry is trying to create a directory outside of the destination directory." - ); - } - - Directory.CreateDirectory(destdir); + Directory.CreateDirectory(parentDir); } - destinationFileName = Path.Combine(destdir, file); } else { - destinationFileName = Path.Combine(fullDestinationDirectoryPath, file); - } - - if (!entry.IsDirectory) - { - destinationFileName = Path.GetFullPath(destinationFileName); + destinationFileName = Path.GetFullPath( + Path.Combine(fullDestinationDirectoryPath, file) + ); if (!destinationFileName.StartsWith(fullDestinationDirectoryPath, PathComparison)) { @@ -84,6 +88,10 @@ public static void WriteEntryToDirectory( "Entry is trying to write a file outside of the destination directory." ); } + } + + if (!entry.IsDirectory) + { write(destinationFileName, options); } else if (options.ExtractFullPath && !Directory.Exists(destinationFileName)) @@ -132,7 +140,6 @@ public static async ValueTask WriteEntryToDirectoryAsync( CancellationToken cancellationToken = default ) { - string destinationFileName; var fullDestinationDirectoryPath = Path.GetFullPath(destinationDirectory); //check for trailing slash. @@ -153,35 +160,40 @@ public static async ValueTask WriteEntryToDirectoryAsync( options ??= new ExtractionOptions() { Overwrite = true }; - var file = Path.GetFileName(entry.Key.NotNull("Entry Key is null")).NotNull("File is null"); + // Cache entry.Key to avoid multiple property access + var entryKey = entry.Key.NotNull("Entry Key is null"); + var file = Path.GetFileName(entryKey).NotNull("File is null"); file = Utility.ReplaceInvalidFileNameChars(file); + + string destinationFileName; if (options.ExtractFullPath) { - var folder = Path.GetDirectoryName(entry.Key.NotNull("Entry Key is null")) - .NotNull("Directory is null"); - var destdir = Path.GetFullPath(Path.Combine(fullDestinationDirectoryPath, folder)); + var folder = Path.GetDirectoryName(entryKey).NotNull("Directory is null"); + // Combine paths first, then get full path once + destinationFileName = Path.GetFullPath( + Path.Combine(fullDestinationDirectoryPath, folder, file) + ); + + // Security check before directory creation + if (!destinationFileName.StartsWith(fullDestinationDirectoryPath, PathComparison)) + { + throw new ExtractionException( + "Entry is trying to write a file outside of the destination directory." + ); + } - if (!Directory.Exists(destdir)) + // Only create parent directory if needed + var parentDir = Path.GetDirectoryName(destinationFileName)!; + if (!Directory.Exists(parentDir)) { - if (!destdir.StartsWith(fullDestinationDirectoryPath, PathComparison)) - { - throw new ExtractionException( - "Entry is trying to create a directory outside of the destination directory." - ); - } - - Directory.CreateDirectory(destdir); + Directory.CreateDirectory(parentDir); } - destinationFileName = Path.Combine(destdir, file); } else { - destinationFileName = Path.Combine(fullDestinationDirectoryPath, file); - } - - if (!entry.IsDirectory) - { - destinationFileName = Path.GetFullPath(destinationFileName); + destinationFileName = Path.GetFullPath( + Path.Combine(fullDestinationDirectoryPath, file) + ); if (!destinationFileName.StartsWith(fullDestinationDirectoryPath, PathComparison)) { @@ -189,6 +201,10 @@ public static async ValueTask WriteEntryToDirectoryAsync( "Entry is trying to write a file outside of the destination directory." ); } + } + + if (!entry.IsDirectory) + { await writeAsync(destinationFileName, options, cancellationToken).ConfigureAwait(false); } else if (options.ExtractFullPath && !Directory.Exists(destinationFileName)) diff --git a/src/SharpCompress/Compressors/LZMA/LzmaDecoder.cs b/src/SharpCompress/Compressors/LZMA/LzmaDecoder.cs index 0fb39626f..15fdc152a 100644 --- a/src/SharpCompress/Compressors/LZMA/LzmaDecoder.cs +++ b/src/SharpCompress/Compressors/LZMA/LzmaDecoder.cs @@ -3,6 +3,7 @@ using System; using System.Diagnostics.CodeAnalysis; using System.IO; +using System.Runtime.CompilerServices; using System.Threading.Tasks; using SharpCompress.Compressors.LZMA.LZ; using SharpCompress.Compressors.LZMA.RangeCoder; @@ -42,6 +43,7 @@ public void Init() _highCoder.Init(); } + [MethodImpl(MethodImplOptions.AggressiveInlining)] public uint Decode(RangeCoder.Decoder rangeDecoder, uint posState) { if (_choice.Decode(rangeDecoder) == 0) @@ -78,6 +80,7 @@ public void Init() } } + [MethodImpl(MethodImplOptions.AggressiveInlining)] public byte DecodeNormal(RangeCoder.Decoder rangeDecoder) { uint symbol = 1; @@ -88,6 +91,7 @@ public byte DecodeNormal(RangeCoder.Decoder rangeDecoder) return (byte)symbol; } + [MethodImpl(MethodImplOptions.AggressiveInlining)] public byte DecodeWithMatchByte(RangeCoder.Decoder rangeDecoder, byte matchByte) { uint symbol = 1; @@ -141,12 +145,15 @@ public void Init() } } + [MethodImpl(MethodImplOptions.AggressiveInlining)] private uint GetState(uint pos, byte prevByte) => ((pos & _posMask) << _numPrevBits) + (uint)(prevByte >> (8 - _numPrevBits)); + [MethodImpl(MethodImplOptions.AggressiveInlining)] public byte DecodeNormal(RangeCoder.Decoder rangeDecoder, uint pos, byte prevByte) => _coders[GetState(pos, prevByte)].DecodeNormal(rangeDecoder); + [MethodImpl(MethodImplOptions.AggressiveInlining)] public byte DecodeWithMatchByte( RangeCoder.Decoder rangeDecoder, uint pos, diff --git a/src/SharpCompress/Compressors/LZMA/RangeCoder/RangeCoder.cs b/src/SharpCompress/Compressors/LZMA/RangeCoder/RangeCoder.cs index 23b3b0b9d..008803ad9 100644 --- a/src/SharpCompress/Compressors/LZMA/RangeCoder/RangeCoder.cs +++ b/src/SharpCompress/Compressors/LZMA/RangeCoder/RangeCoder.cs @@ -164,8 +164,10 @@ public void Normalize2() } } + [MethodImpl(MethodImplOptions.AggressiveInlining)] public uint GetThreshold(uint total) => _code / (_range /= total); + [MethodImpl(MethodImplOptions.AggressiveInlining)] public void Decode(uint start, uint size) { _code -= start * _range; @@ -173,6 +175,7 @@ public void Decode(uint start, uint size) Normalize(); } + [MethodImpl(MethodImplOptions.AggressiveInlining)] public uint DecodeDirectBits(int numTotalBits) { var range = _range; @@ -205,6 +208,7 @@ public uint DecodeDirectBits(int numTotalBits) return result; } + [MethodImpl(MethodImplOptions.AggressiveInlining)] public uint DecodeBit(uint size0, int numTotalBits) { var newBound = (_range >> numTotalBits) * size0; diff --git a/src/SharpCompress/Compressors/LZMA/RangeCoder/RangeCoderBit.cs b/src/SharpCompress/Compressors/LZMA/RangeCoder/RangeCoderBit.cs index 562b2356d..8923ab81d 100644 --- a/src/SharpCompress/Compressors/LZMA/RangeCoder/RangeCoderBit.cs +++ b/src/SharpCompress/Compressors/LZMA/RangeCoder/RangeCoderBit.cs @@ -1,3 +1,7 @@ +#nullable disable + +using System.Runtime.CompilerServices; + namespace SharpCompress.Compressors.LZMA.RangeCoder; internal struct BitEncoder @@ -100,6 +104,7 @@ public void UpdateModel(int numMoveBits, uint symbol) public void Init() => _prob = K_BIT_MODEL_TOTAL >> 1; + [MethodImpl(MethodImplOptions.AggressiveInlining)] public uint Decode(Decoder rangeDecoder) { var newBound = (rangeDecoder._range >> K_NUM_BIT_MODEL_TOTAL_BITS) * _prob; diff --git a/src/SharpCompress/IO/BufferedSubStream.cs b/src/SharpCompress/IO/BufferedSubStream.cs index 1cfc1867b..804518bbb 100755 --- a/src/SharpCompress/IO/BufferedSubStream.cs +++ b/src/SharpCompress/IO/BufferedSubStream.cs @@ -143,6 +143,70 @@ public override int Read(byte[] buffer, int offset, int count) return count; } + /// + /// Fast skip operation that minimizes cache refills and uses larger reads. + /// Used by StreamExtensions.Skip to efficiently skip large amounts of data. + /// For non-seekable streams like LZMA, we must still read (decompress) the data, + /// but we can do it in very large chunks to minimize overhead. + /// + internal void SkipInternal(long advanceAmount) + { + if (advanceAmount <= 0) + { + return; + } + + // First, skip what's already in cache (free) + var inCache = _cacheLength - _cacheOffset; + if (inCache > 0) + { + var skipFromCache = (int)Math.Min(advanceAmount, inCache); + _cacheOffset += skipFromCache; + advanceAmount -= skipFromCache; + } + + if (advanceAmount == 0) + { + return; + } + + // For remaining data, we must actually read it from the underlying stream + // Use very large reads to minimize LZMA decompression call overhead + var skipBuffer = ArrayPool.Shared.Rent(1048576); // 1MB for skipping + try + { + while (advanceAmount > 0 && BytesLeftToRead > 0) + { + var toRead = (int) + Math.Min(Math.Min(advanceAmount, BytesLeftToRead), skipBuffer.Length); + + // Only seek if we're not already at the correct position + if (Stream.CanSeek && Stream.Position != origin) + { + Stream.Position = origin; + } + + var read = Stream.Read(skipBuffer, 0, toRead); + if (read == 0) + { + break; + } + + origin += read; + BytesLeftToRead -= read; + advanceAmount -= read; + } + + // Invalidate cache since we skipped past it + _cacheOffset = 0; + _cacheLength = 0; + } + finally + { + ArrayPool.Shared.Return(skipBuffer); + } + } + public override int ReadByte() { if (_cacheOffset == _cacheLength) diff --git a/src/SharpCompress/Polyfills/StreamExtensions.cs b/src/SharpCompress/Polyfills/StreamExtensions.cs index 6803e1e03..94f67ff8d 100644 --- a/src/SharpCompress/Polyfills/StreamExtensions.cs +++ b/src/SharpCompress/Polyfills/StreamExtensions.cs @@ -18,8 +18,34 @@ public void Skip(long advanceAmount) return; } - using var readOnlySubStream = new IO.ReadOnlySubStream(stream, advanceAmount); - readOnlySubStream.CopyTo(Stream.Null); + // For BufferedSubStream, use internal fast skip to avoid multiple cache refills + if (stream is IO.BufferedSubStream bufferedSubStream) + { + bufferedSubStream.SkipInternal(advanceAmount); + return; + } + + // Use a very large buffer (1MB) to minimize Read() calls when skipping + // This is critical for solid 7zip archives with LZMA compression + var buffer = ArrayPool.Shared.Rent(1048576); // 1MB buffer + try + { + long remaining = advanceAmount; + while (remaining > 0) + { + var toRead = (int)Math.Min(remaining, buffer.Length); + var read = stream.Read(buffer, 0, toRead); + if (read == 0) + { + break; // End of stream + } + remaining -= read; + } + } + finally + { + ArrayPool.Shared.Return(buffer); + } } public void Skip() => stream.CopyTo(Stream.Null); diff --git a/src/SharpCompress/Readers/AbstractReader.cs b/src/SharpCompress/Readers/AbstractReader.cs index 123576c29..5d53b8b03 100644 --- a/src/SharpCompress/Readers/AbstractReader.cs +++ b/src/SharpCompress/Readers/AbstractReader.cs @@ -339,7 +339,7 @@ internal void Write(Stream writeStream) { using Stream s = OpenEntryStream(); var sourceStream = WrapWithProgress(s, Entry); - sourceStream.CopyTo(writeStream, 81920); + sourceStream.CopyTo(writeStream, 1048576); // 1MB buffer } internal async ValueTask WriteAsync(Stream writeStream, CancellationToken cancellationToken) @@ -347,11 +347,15 @@ internal async ValueTask WriteAsync(Stream writeStream, CancellationToken cancel #if LEGACY_DOTNET using Stream s = await OpenEntryStreamAsync(cancellationToken).ConfigureAwait(false); var sourceStream = WrapWithProgress(s, Entry); - await sourceStream.CopyToAsync(writeStream, 81920, cancellationToken).ConfigureAwait(false); + await sourceStream + .CopyToAsync(writeStream, 1048576, cancellationToken) + .ConfigureAwait(false); // 1MB buffer #else await using Stream s = await OpenEntryStreamAsync(cancellationToken).ConfigureAwait(false); var sourceStream = WrapWithProgress(s, Entry); - await sourceStream.CopyToAsync(writeStream, 81920, cancellationToken).ConfigureAwait(false); + await sourceStream + .CopyToAsync(writeStream, 1048576, cancellationToken) + .ConfigureAwait(false); // 1MB buffer #endif }