Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
21 changes: 18 additions & 3 deletions src/SharpCompress/Archives/IArchiveEntryExtensions.cs
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@ namespace SharpCompress.Archives;

public static class IArchiveEntryExtensions
{
private const int BufferSize = 81920;
private const int BufferSize = 1048576; // 1MB buffer for better disk I/O performance
Copy link

Copilot AI Jan 27, 2026

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Setting BufferSize to 1MB affects CopyTo/CopyToAsync below, which will allocate a new 1MB byte[] per call (LOH) in the framework implementation. This can significantly increase GC pressure for archives with many (especially small) entries. Consider keeping the CopyTo buffer below the LOH threshold or using a pooled-buffer copy implementation so the larger buffer doesn’t allocate per extraction call.

Suggested change
private const int BufferSize = 1048576; // 1MB buffer for better disk I/O performance
private const int BufferSize = 81920; // 80KB buffer to avoid LOH allocations while maintaining good I/O performance

Copilot uses AI. Check for mistakes.

/// <param name="archiveEntry">The archive entry to extract.</param>
extension(IArchiveEntry archiveEntry)
Expand Down Expand Up @@ -135,7 +135,14 @@ public void WriteToFile(string destinationFileName, ExtractionOptions? options =
options,
(x, fm) =>
{
using var fs = File.Open(destinationFileName, fm);
// Use larger buffer for better disk I/O performance
using var fs = new FileStream(
destinationFileName,
fm,
FileAccess.Write,
FileShare.None,
bufferSize: 1048576
); // 1MB buffer
Comment on lines +139 to +145
Copy link

Copilot AI Jan 27, 2026

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

These FileStream constructors set bufferSize to 1MB, which typically causes a 1MB managed buffer allocation per extracted file (often LOH) in addition to the CopyTo buffer. This can lead to large transient allocations/LOH fragmentation when extracting many files. Consider relying on the default FileStream buffering or making the larger FileStream buffer size conditional/opt-in (and ideally reuse pooled buffers where possible).

Copilot uses AI. Check for mistakes.
Comment on lines +139 to +145
Copy link

Copilot AI Jan 27, 2026

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The 1MB literal buffer size is duplicated here even though the file already defines BufferSize. Using the constant would prevent accidental divergence if the buffer size is tuned later.

Copilot uses AI. Check for mistakes.
entry.WriteTo(fs);
}
);
Expand All @@ -155,7 +162,15 @@ await ExtractionMethods
options,
async (x, fm, ct) =>
{
using var fs = File.Open(destinationFileName, fm);
// Use async I/O with large buffer for better performance
using var fs = new FileStream(
destinationFileName,
fm,
FileAccess.Write,
FileShare.None,
bufferSize: 1048576,
useAsync: true
); // 1MB buffer
await entry.WriteToAsync(fs, null, ct).ConfigureAwait(false);
},
cancellationToken
Expand Down
100 changes: 58 additions & 42 deletions src/SharpCompress/Common/ExtractionMethods.cs
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,6 @@ public static void WriteEntryToDirectory(
Action<string, ExtractionOptions?> write
)
{
string destinationFileName;
var fullDestinationDirectoryPath = Path.GetFullPath(destinationDirectory);

//check for trailing slash.
Expand All @@ -48,42 +47,51 @@ public static void WriteEntryToDirectory(

options ??= new ExtractionOptions() { Overwrite = true };

var file = Path.GetFileName(entry.Key.NotNull("Entry Key is null")).NotNull("File is null");
// Cache entry.Key to avoid multiple property access
var entryKey = entry.Key.NotNull("Entry Key is null");
var file = Path.GetFileName(entryKey).NotNull("File is null");
file = Utility.ReplaceInvalidFileNameChars(file);

string destinationFileName;
if (options.ExtractFullPath)
{
var folder = Path.GetDirectoryName(entry.Key.NotNull("Entry Key is null"))
.NotNull("Directory is null");
var destdir = Path.GetFullPath(Path.Combine(fullDestinationDirectoryPath, folder));
var folder = Path.GetDirectoryName(entryKey).NotNull("Directory is null");
// Combine paths first, then get full path once
destinationFileName = Path.GetFullPath(
Path.Combine(fullDestinationDirectoryPath, folder, file)
);

// Security check before directory creation
if (!destinationFileName.StartsWith(fullDestinationDirectoryPath, PathComparison))
{
throw new ExtractionException(
"Entry is trying to write a file outside of the destination directory."
);
}

if (!Directory.Exists(destdir))
// Only create parent directory if needed (Directory.CreateDirectory is idempotent but still has overhead)
var parentDir = Path.GetDirectoryName(destinationFileName)!;
if (!Directory.Exists(parentDir))
{
if (!destdir.StartsWith(fullDestinationDirectoryPath, PathComparison))
{
throw new ExtractionException(
"Entry is trying to create a directory outside of the destination directory."
);
}

Directory.CreateDirectory(destdir);
Directory.CreateDirectory(parentDir);
}
destinationFileName = Path.Combine(destdir, file);
}
else
{
destinationFileName = Path.Combine(fullDestinationDirectoryPath, file);
}

if (!entry.IsDirectory)
{
destinationFileName = Path.GetFullPath(destinationFileName);
destinationFileName = Path.GetFullPath(
Path.Combine(fullDestinationDirectoryPath, file)
);

if (!destinationFileName.StartsWith(fullDestinationDirectoryPath, PathComparison))
{
throw new ExtractionException(
"Entry is trying to write a file outside of the destination directory."
);
}
}

if (!entry.IsDirectory)
{
write(destinationFileName, options);
}
else if (options.ExtractFullPath && !Directory.Exists(destinationFileName))
Expand Down Expand Up @@ -132,7 +140,6 @@ public static async ValueTask WriteEntryToDirectoryAsync(
CancellationToken cancellationToken = default
)
{
string destinationFileName;
var fullDestinationDirectoryPath = Path.GetFullPath(destinationDirectory);

//check for trailing slash.
Expand All @@ -153,42 +160,51 @@ public static async ValueTask WriteEntryToDirectoryAsync(

options ??= new ExtractionOptions() { Overwrite = true };

var file = Path.GetFileName(entry.Key.NotNull("Entry Key is null")).NotNull("File is null");
// Cache entry.Key to avoid multiple property access
var entryKey = entry.Key.NotNull("Entry Key is null");
var file = Path.GetFileName(entryKey).NotNull("File is null");
file = Utility.ReplaceInvalidFileNameChars(file);

string destinationFileName;
if (options.ExtractFullPath)
{
var folder = Path.GetDirectoryName(entry.Key.NotNull("Entry Key is null"))
.NotNull("Directory is null");
var destdir = Path.GetFullPath(Path.Combine(fullDestinationDirectoryPath, folder));
var folder = Path.GetDirectoryName(entryKey).NotNull("Directory is null");
// Combine paths first, then get full path once
destinationFileName = Path.GetFullPath(
Path.Combine(fullDestinationDirectoryPath, folder, file)
);

// Security check before directory creation
if (!destinationFileName.StartsWith(fullDestinationDirectoryPath, PathComparison))
{
throw new ExtractionException(
"Entry is trying to write a file outside of the destination directory."
);
}

if (!Directory.Exists(destdir))
// Only create parent directory if needed
var parentDir = Path.GetDirectoryName(destinationFileName)!;
if (!Directory.Exists(parentDir))
{
if (!destdir.StartsWith(fullDestinationDirectoryPath, PathComparison))
{
throw new ExtractionException(
"Entry is trying to create a directory outside of the destination directory."
);
}

Directory.CreateDirectory(destdir);
Directory.CreateDirectory(parentDir);
}
destinationFileName = Path.Combine(destdir, file);
}
else
{
destinationFileName = Path.Combine(fullDestinationDirectoryPath, file);
}

if (!entry.IsDirectory)
{
destinationFileName = Path.GetFullPath(destinationFileName);
destinationFileName = Path.GetFullPath(
Path.Combine(fullDestinationDirectoryPath, file)
);

if (!destinationFileName.StartsWith(fullDestinationDirectoryPath, PathComparison))
{
throw new ExtractionException(
"Entry is trying to write a file outside of the destination directory."
);
}
}

if (!entry.IsDirectory)
{
await writeAsync(destinationFileName, options, cancellationToken).ConfigureAwait(false);
}
else if (options.ExtractFullPath && !Directory.Exists(destinationFileName))
Expand Down
7 changes: 7 additions & 0 deletions src/SharpCompress/Compressors/LZMA/LzmaDecoder.cs
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@
using System;
using System.Diagnostics.CodeAnalysis;
using System.IO;
using System.Runtime.CompilerServices;
using System.Threading.Tasks;
using SharpCompress.Compressors.LZMA.LZ;
using SharpCompress.Compressors.LZMA.RangeCoder;
Expand Down Expand Up @@ -42,6 +43,7 @@ public void Init()
_highCoder.Init();
}

[MethodImpl(MethodImplOptions.AggressiveInlining)]
public uint Decode(RangeCoder.Decoder rangeDecoder, uint posState)
{
if (_choice.Decode(rangeDecoder) == 0)
Expand Down Expand Up @@ -78,6 +80,7 @@ public void Init()
}
}

[MethodImpl(MethodImplOptions.AggressiveInlining)]
public byte DecodeNormal(RangeCoder.Decoder rangeDecoder)
{
uint symbol = 1;
Expand All @@ -88,6 +91,7 @@ public byte DecodeNormal(RangeCoder.Decoder rangeDecoder)
return (byte)symbol;
}

[MethodImpl(MethodImplOptions.AggressiveInlining)]
public byte DecodeWithMatchByte(RangeCoder.Decoder rangeDecoder, byte matchByte)
{
uint symbol = 1;
Expand Down Expand Up @@ -141,12 +145,15 @@ public void Init()
}
}

[MethodImpl(MethodImplOptions.AggressiveInlining)]
private uint GetState(uint pos, byte prevByte) =>
((pos & _posMask) << _numPrevBits) + (uint)(prevByte >> (8 - _numPrevBits));

[MethodImpl(MethodImplOptions.AggressiveInlining)]
public byte DecodeNormal(RangeCoder.Decoder rangeDecoder, uint pos, byte prevByte) =>
_coders[GetState(pos, prevByte)].DecodeNormal(rangeDecoder);

[MethodImpl(MethodImplOptions.AggressiveInlining)]
public byte DecodeWithMatchByte(
RangeCoder.Decoder rangeDecoder,
uint pos,
Expand Down
4 changes: 4 additions & 0 deletions src/SharpCompress/Compressors/LZMA/RangeCoder/RangeCoder.cs
Original file line number Diff line number Diff line change
Expand Up @@ -164,15 +164,18 @@ public void Normalize2()
}
}

[MethodImpl(MethodImplOptions.AggressiveInlining)]
public uint GetThreshold(uint total) => _code / (_range /= total);

[MethodImpl(MethodImplOptions.AggressiveInlining)]
public void Decode(uint start, uint size)
{
_code -= start * _range;
_range *= size;
Normalize();
}

[MethodImpl(MethodImplOptions.AggressiveInlining)]
public uint DecodeDirectBits(int numTotalBits)
{
var range = _range;
Expand Down Expand Up @@ -205,6 +208,7 @@ public uint DecodeDirectBits(int numTotalBits)
return result;
}

[MethodImpl(MethodImplOptions.AggressiveInlining)]
public uint DecodeBit(uint size0, int numTotalBits)
{
var newBound = (_range >> numTotalBits) * size0;
Expand Down
Original file line number Diff line number Diff line change
@@ -1,3 +1,7 @@
#nullable disable

using System.Runtime.CompilerServices;

namespace SharpCompress.Compressors.LZMA.RangeCoder;

internal struct BitEncoder
Expand Down Expand Up @@ -100,6 +104,7 @@ public void UpdateModel(int numMoveBits, uint symbol)

public void Init() => _prob = K_BIT_MODEL_TOTAL >> 1;

[MethodImpl(MethodImplOptions.AggressiveInlining)]
public uint Decode(Decoder rangeDecoder)
{
var newBound = (rangeDecoder._range >> K_NUM_BIT_MODEL_TOTAL_BITS) * _prob;
Expand Down
64 changes: 64 additions & 0 deletions src/SharpCompress/IO/BufferedSubStream.cs
Original file line number Diff line number Diff line change
Expand Up @@ -143,6 +143,70 @@ public override int Read(byte[] buffer, int offset, int count)
return count;
}

/// <summary>
/// Fast skip operation that minimizes cache refills and uses larger reads.
/// Used by StreamExtensions.Skip to efficiently skip large amounts of data.
/// For non-seekable streams like LZMA, we must still read (decompress) the data,
/// but we can do it in very large chunks to minimize overhead.
/// </summary>
internal void SkipInternal(long advanceAmount)
{
if (advanceAmount <= 0)
{
return;
}

// First, skip what's already in cache (free)
var inCache = _cacheLength - _cacheOffset;
if (inCache > 0)
{
var skipFromCache = (int)Math.Min(advanceAmount, inCache);
_cacheOffset += skipFromCache;
advanceAmount -= skipFromCache;
}

if (advanceAmount == 0)
{
return;
}

// For remaining data, we must actually read it from the underlying stream
// Use very large reads to minimize LZMA decompression call overhead
var skipBuffer = ArrayPool<byte>.Shared.Rent(1048576); // 1MB for skipping
try
{
while (advanceAmount > 0 && BytesLeftToRead > 0)
{
var toRead = (int)
Math.Min(Math.Min(advanceAmount, BytesLeftToRead), skipBuffer.Length);
Comment on lines +173 to +181
Copy link

Copilot AI Jan 27, 2026

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

SkipInternal always rents a 1MB buffer even when BytesLeftToRead is already 0 or the remaining skip amount is small (after consuming cached bytes). Consider returning early when there’s nothing left to read/skip, and renting a buffer sized to min(remainingSkip, 1MB) to reduce unnecessary large rentals for small skips.

Copilot uses AI. Check for mistakes.

// Only seek if we're not already at the correct position
if (Stream.CanSeek && Stream.Position != origin)
{
Stream.Position = origin;
}

var read = Stream.Read(skipBuffer, 0, toRead);
if (read == 0)
{
break;
}

origin += read;
BytesLeftToRead -= read;
advanceAmount -= read;
}

// Invalidate cache since we skipped past it
_cacheOffset = 0;
_cacheLength = 0;
}
finally
{
ArrayPool<byte>.Shared.Return(skipBuffer);
}
}

public override int ReadByte()
{
if (_cacheOffset == _cacheLength)
Expand Down
Loading
Loading