Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
133 changes: 131 additions & 2 deletions src/SharpCompress/Common/Tar/Headers/TarHeader.cs
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
using System;
using System.Buffers.Binary;
using System.Collections.Generic;
using System.IO;
using System.Text;

Expand All @@ -9,8 +10,16 @@ internal sealed class TarHeader
{
internal static readonly DateTime EPOCH = new(1970, 1, 1, 0, 0, 0, DateTimeKind.Utc);

public TarHeader(ArchiveEncoding archiveEncoding) => ArchiveEncoding = archiveEncoding;
public TarHeader(
ArchiveEncoding archiveEncoding,
TarHeaderWriteFormat writeFormat = TarHeaderWriteFormat.GNU_TAR_LONG_LINK
)
{
ArchiveEncoding = archiveEncoding;
WriteFormat = writeFormat;
}

internal TarHeaderWriteFormat WriteFormat { get; set; }
internal string? Name { get; set; }
internal string? LinkName { get; set; }

Expand All @@ -30,6 +39,114 @@ internal sealed class TarHeader
private const int MAX_LONG_NAME_SIZE = 32768;

internal void Write(Stream output)
{
switch (WriteFormat)
{
case TarHeaderWriteFormat.GNU_TAR_LONG_LINK:
WriteGnuTarLongLink(output);
break;
case TarHeaderWriteFormat.USTAR:
WriteUstar(output);
break;
default:
throw new Exception("This should be impossible...");
}
}

internal void WriteUstar(Stream output)
{
var buffer = new byte[BLOCK_SIZE];

WriteOctalBytes(511, buffer, 100, 8); // file mode
WriteOctalBytes(0, buffer, 108, 8); // owner ID
WriteOctalBytes(0, buffer, 116, 8); // group ID

//ArchiveEncoding.UTF8.GetBytes("magic").CopyTo(buffer, 257);
Copy link

Copilot AI Dec 8, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This commented-out code should be removed. If it was for testing purposes, it should not be included in the final PR.

Suggested change
//ArchiveEncoding.UTF8.GetBytes("magic").CopyTo(buffer, 257);

Copilot uses AI. Check for mistakes.
var nameByteCount = ArchiveEncoding
.GetEncoding()
.GetByteCount(Name.NotNull("Name is null"));

if (nameByteCount > 100)
{
// if name is longer, try to split it into name and namePrefix

string fullName = Name.NotNull("Name is null");

// find all directory separators
List<int> dirSeps = new List<int>();
for (int i = 0; i < fullName.Length; i++)
{
if (fullName[i] == Path.DirectorySeparatorChar)
{
dirSeps.Add(i);
}
}

// find the right place to split the name
int splitIndex = -1;
for (int i = 0; i < dirSeps.Count; i++)
{
int count = ArchiveEncoding
.GetEncoding()
.GetByteCount(fullName.Substring(0, dirSeps[i]));
if (count < 155)
{
splitIndex = dirSeps[i];
}
else
{
break;
}
}

if (splitIndex == -1)
{
throw new Exception(
$"Tar header USTAR format can not fit file name \"{fullName}\" of length {nameByteCount}! Directory separator not found! Try using GNU Tar format instead!"
);
}

string namePrefix = fullName.Substring(0, splitIndex);
string name = fullName.Substring(splitIndex + 1);

if (this.ArchiveEncoding.GetEncoding().GetByteCount(namePrefix) >= 155)
throw new Exception(
$"Tar header USTAR format can not fit file name \"{fullName}\" of length {nameByteCount}! Try using GNU Tar format instead!"
);

if (this.ArchiveEncoding.GetEncoding().GetByteCount(name) >= 100)
throw new Exception(
$"Tar header USTAR format can not fit file name \"{fullName}\" of length {nameByteCount}! Try using GNU Tar format instead!"
);

// write name prefix
WriteStringBytes(ArchiveEncoding.Encode(namePrefix), buffer, 345, 100);
Copy link

Copilot AI Dec 8, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The namePrefix parameter should specify the correct length limit. Line 113 is writing to offset 345 with length 100, but the USTAR prefix field is at offset 345 with a maximum length of 155 bytes, not 100. This should be WriteStringBytes(ArchiveEncoding.Encode(namePrefix), buffer, 345, 155);.

Suggested change
WriteStringBytes(ArchiveEncoding.Encode(namePrefix), buffer, 345, 100);
WriteStringBytes(ArchiveEncoding.Encode(namePrefix), buffer, 345, 155);

Copilot uses AI. Check for mistakes.
// write partial name
WriteStringBytes(ArchiveEncoding.Encode(name), buffer, 100);
}
else
{
WriteStringBytes(ArchiveEncoding.Encode(Name.NotNull("Name is null")), buffer, 100);
Comment on lines +125 to +129
Copy link

Copilot AI Dec 8, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This call to WriteStringBytes is missing the offset parameter. It should be WriteStringBytes(ArchiveEncoding.Encode(name), buffer, 0, 100); to write the name starting at offset 0. Without the offset parameter, this will use the wrong overload and write to an incorrect location.

Suggested change
WriteStringBytes(ArchiveEncoding.Encode(name), buffer, 100);
}
else
{
WriteStringBytes(ArchiveEncoding.Encode(Name.NotNull("Name is null")), buffer, 100);
WriteStringBytes(ArchiveEncoding.Encode(name), buffer, 0, 100);
}
else
{
WriteStringBytes(ArchiveEncoding.Encode(Name.NotNull("Name is null")), buffer, 0, 100);

Copilot uses AI. Check for mistakes.
Copy link

Copilot AI Dec 8, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This call to WriteStringBytes is missing the offset parameter. It should be WriteStringBytes(ArchiveEncoding.Encode(Name.NotNull("Name is null")), buffer, 0, 100); to write the name starting at offset 0. Without the offset parameter, this will use the wrong overload and write to an incorrect location.

Suggested change
WriteStringBytes(ArchiveEncoding.Encode(Name.NotNull("Name is null")), buffer, 100);
WriteStringBytes(ArchiveEncoding.Encode(Name.NotNull("Name is null")), buffer, 100, 100);

Copilot uses AI. Check for mistakes.
}

WriteOctalBytes(Size, buffer, 124, 12);
var time = (long)(LastModifiedTime.ToUniversalTime() - EPOCH).TotalSeconds;
WriteOctalBytes(time, buffer, 136, 12);
buffer[156] = (byte)EntryType;

// write ustar magic field
WriteStringBytes(Encoding.ASCII.GetBytes("ustar"), buffer, 257, 6);
// write ustar version "00"
buffer[263] = 0x30;
buffer[264] = 0x30;

var crc = RecalculateChecksum(buffer);
WriteOctalBytes(crc, buffer, 148, 8);

output.Write(buffer, 0, buffer.Length);
}

internal void WriteGnuTarLongLink(Stream output)
{
var buffer = new byte[BLOCK_SIZE];

Expand Down Expand Up @@ -85,7 +202,7 @@ internal void Write(Stream output)
0,
100 - ArchiveEncoding.GetEncoding().GetMaxByteCount(1)
);
Write(output);
WriteGnuTarLongLink(output);
}
}

Expand Down Expand Up @@ -241,6 +358,18 @@ private static void WriteStringBytes(ReadOnlySpan<byte> name, Span<byte> buffer,
buffer.Slice(i, length - i).Clear();
}

private static void WriteStringBytes(
ReadOnlySpan<byte> name,
Span<byte> buffer,
int offset,
int length
)
{
name.CopyTo(buffer.Slice(offset));
var i = Math.Min(length, name.Length);
buffer.Slice(offset + i, length - i).Clear();
}

private static void WriteStringBytes(string name, byte[] buffer, int offset, int length)
{
int i;
Expand Down
7 changes: 7 additions & 0 deletions src/SharpCompress/Common/Tar/Headers/TarHeaderWriteFormat.cs
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
namespace SharpCompress.Common.Tar.Headers;

public enum TarHeaderWriteFormat
{
GNU_TAR_LONG_LINK,
Comment on lines +2 to +5
Copy link

Copilot AI Dec 8, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The enum and its values lack XML documentation. Consider adding documentation to explain:

  • The purpose of each format (GNU vs USTAR)
  • Key differences (e.g., USTAR has stricter filename length limitations but better compatibility with older tools)
  • When to use each format (e.g., use USTAR for compatibility with older software, GNU for long filenames)

Example:

/// <summary>
/// Specifies the TAR header format to use when writing archives.
/// </summary>
public enum TarHeaderWriteFormat
{
    /// <summary>
    /// GNU TAR format with long link support. Supports filenames longer than 100 characters.
    /// This is the default format and recommended for modern use.
    /// </summary>
    GnuTarLongLink,
    
    /// <summary>
    /// USTAR (Uniform Standard Tape Archive) format. Better compatibility with older tools
    /// but limits filenames to 100 characters (or 255 with prefix splitting).
    /// </summary>
    Ustar
}
Suggested change
public enum TarHeaderWriteFormat
{
GNU_TAR_LONG_LINK,
/// <summary>
/// Specifies the TAR header format to use when writing archives.
/// </summary>
public enum TarHeaderWriteFormat
{
/// <summary>
/// GNU TAR format with long link support. Supports filenames longer than 100 characters.
/// This is the default format and recommended for modern use.
/// </summary>
GNU_TAR_LONG_LINK,
/// <summary>
/// USTAR (Uniform Standard Tape Archive) format. Better compatibility with older tools
/// but limits filenames to 100 characters (or 255 with prefix splitting).
/// Use this format for compatibility with legacy software.
/// </summary>

Copilot uses AI. Check for mistakes.
USTAR,
}
4 changes: 3 additions & 1 deletion src/SharpCompress/Writers/Tar/TarWriter.cs
Original file line number Diff line number Diff line change
Expand Up @@ -15,11 +15,13 @@ namespace SharpCompress.Writers.Tar;
public class TarWriter : AbstractWriter
{
private readonly bool finalizeArchiveOnClose;
private TarHeaderWriteFormat headerFormat;
Copy link

Copilot AI Dec 8, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The field headerFormat should be readonly since it's only assigned in the constructor and never modified afterward. This makes the code's intent clearer and prevents accidental modification.

Suggested change
private TarHeaderWriteFormat headerFormat;
private readonly TarHeaderWriteFormat headerFormat;

Copilot uses AI. Check for mistakes.

public TarWriter(Stream destination, TarWriterOptions options)
: base(ArchiveType.Tar, options)
{
finalizeArchiveOnClose = options.FinalizeArchiveOnClose;
headerFormat = options.HeaderFormat;

if (!destination.CanWrite)
{
Expand Down Expand Up @@ -121,7 +123,7 @@ public void Write(string filename, Stream source, DateTime? modificationTime, lo

var realSize = size ?? source.Length;

var header = new TarHeader(WriterOptions.ArchiveEncoding);
var header = new TarHeader(WriterOptions.ArchiveEncoding, headerFormat);

header.LastModifiedTime = modificationTime ?? TarHeader.EPOCH;
header.Name = NormalizeFilename(filename);
Expand Down
15 changes: 13 additions & 2 deletions src/SharpCompress/Writers/Tar/TarWriterOptions.cs
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
using SharpCompress.Common;
using SharpCompress.Common.Tar.Headers;

namespace SharpCompress.Writers.Tar;

Expand All @@ -9,8 +10,18 @@ public class TarWriterOptions : WriterOptions
/// </summary>
public bool FinalizeArchiveOnClose { get; }

Copy link

Copilot AI Dec 8, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The new HeaderFormat property lacks XML documentation. Consider adding a summary that explains its purpose and default value. Example:

/// <summary>
/// Gets the TAR header format to use when writing the archive.
/// Defaults to GNU_TAR_LONG_LINK for maximum filename length support.
/// </summary>
public TarHeaderWriteFormat HeaderFormat { get; }
Suggested change
/// <summary>
/// Gets the TAR header format to use when writing the archive.
/// Defaults to GNU_TAR_LONG_LINK for maximum filename length support.
/// </summary>

Copilot uses AI. Check for mistakes.
public TarWriterOptions(CompressionType compressionType, bool finalizeArchiveOnClose)
: base(compressionType) => FinalizeArchiveOnClose = finalizeArchiveOnClose;
public TarHeaderWriteFormat HeaderFormat { get; }

public TarWriterOptions(
CompressionType compressionType,
bool finalizeArchiveOnClose,
TarHeaderWriteFormat headerFormat = TarHeaderWriteFormat.GNU_TAR_LONG_LINK
)
: base(compressionType)
{
FinalizeArchiveOnClose = finalizeArchiveOnClose;
HeaderFormat = headerFormat;
}

internal TarWriterOptions(WriterOptions options)
: this(options.CompressionType, true) => ArchiveEncoding = options.ArchiveEncoding;
Comment on lines 26 to 27
Copy link

Copilot AI Dec 8, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The internal constructor doesn't pass the headerFormat parameter, which means it will always use the default GNU_TAR_LONG_LINK format. This could lead to unexpected behavior when creating a TarWriter from generic WriterOptions. Consider adding a parameter or explicitly setting the header format in this constructor.

Suggested change
internal TarWriterOptions(WriterOptions options)
: this(options.CompressionType, true) => ArchiveEncoding = options.ArchiveEncoding;
internal TarWriterOptions(WriterOptions options, TarHeaderWriteFormat headerFormat = TarHeaderWriteFormat.GNU_TAR_LONG_LINK)
: this(options.CompressionType, true, headerFormat) => ArchiveEncoding = options.ArchiveEncoding;

Copilot uses AI. Check for mistakes.
Expand Down