From 6fecf8bb4f05b0a16d3313be012d49acf149ac05 Mon Sep 17 00:00:00 2001 From: Nicholas Vinson Date: Thu, 20 Mar 2025 21:59:33 -0400 Subject: [PATCH 1/5] Add bindings to change symlink read behavior. Add bindings to change symlink read behavior. Unfortunately, libarchive does not seem to have a way of getting the current setting, so only the setter was implemented. Closes: #132 Signed-off-by: Nicholas Vinson --- libarchive/ffi.py | 6 ++++++ libarchive/write.py | 15 ++++++++++++++- 2 files changed, 20 insertions(+), 1 deletion(-) diff --git a/libarchive/ffi.py b/libarchive/ffi.py index 1fc321a..850e4c4 100644 --- a/libarchive/ffi.py +++ b/libarchive/ffi.py @@ -35,6 +35,9 @@ ARCHIVE_FAILED = -25 # Current operation cannot complete. ARCHIVE_FATAL = -30 # No more operations are possible. +ARCHIVE_SYMLINK_MODE_HYBRID = 'H' +ARCHIVE_SYMLINK_MODE_LOGICAL = 'L' +ARCHIVE_SYMLINK_MODE_PHYSICAL = 'P' # Callback types @@ -286,6 +289,9 @@ def get_write_filter_function(filter_name): ffi('read_disk_open', [c_archive_p, c_char_p], c_int, check_int) ffi('read_disk_open_w', [c_archive_p, c_wchar_p], c_int, check_int) ffi('read_disk_descend', [c_archive_p], c_int, check_int) +ffi('read_disk_set_symlink_hybrid', [c_archive_p], c_int) +ffi('read_disk_set_symlink_logical', [c_archive_p], c_int) +ffi('read_disk_set_symlink_physical', [c_archive_p], c_int) # archive_read_data diff --git a/libarchive/write.py b/libarchive/write.py index 3b6caba..a058e53 100644 --- a/libarchive/write.py +++ b/libarchive/write.py @@ -46,7 +46,7 @@ def add_entries(self, entries): def add_files( self, *paths, flags=0, lookup=False, pathname=None, recursive=True, - **attributes + symlink_mode=ffi.ARCHIVE_SYMLINK_MODE_PHYSICAL, **attributes ): """Read files through the OS and add them to the archive. @@ -63,6 +63,11 @@ def add_files( recursive (bool): when False, if a path in `paths` is a directory, only the directory itself is added. + symlink_mode (enum): + Determines how symlinks are traversed. Valid options are + ARCHIVE_SYMLINK_MODE_HYBRID, ARCHIVE_SYMLINK_MODE_LOGICAL, and + ARCHIVE_SYMLINK_MODE_PHYSICAL as defined in the ffi module. + Default value matches default from libarchive. attributes (dict): passed to `ArchiveEntry.modify()` Raises: @@ -79,6 +84,14 @@ def add_files( entry_p = entry._entry_p for path in paths: with new_archive_read_disk(path, flags, lookup) as read_p: + if (symlink_mode == ffi.ARCHIVE_SYMLINK_MODE_PHYSICAL): + ffi.read_disk_set_symlink_physical(read_p) + elif (symlink_mode == ffi.ARCHIVE_SYMLINK_MODE_LOGICAL): + ffi.read_disk_set_symlink_logical(read_p) + elif (symlink_mode == ffi.ARCHIVE_SYMLINK_MODE_HYBRID): + ffi.read_disk_set_symlink_hybrid(read_p) + else: + raise ValueError(f"Bad symlink mode value {symlink_mode}") while 1: r = read_next_header2(read_p, entry_p) if r == ARCHIVE_EOF: From 88520c3a5dc0908ba13f928cec0122ac2bfb7518 Mon Sep 17 00:00:00 2001 From: Changaco Date: Sun, 13 Apr 2025 11:56:44 +0200 Subject: [PATCH 2/5] refactor --- libarchive/ffi.py | 3 --- libarchive/write.py | 31 +++++++++++++++++-------------- 2 files changed, 17 insertions(+), 17 deletions(-) diff --git a/libarchive/ffi.py b/libarchive/ffi.py index 850e4c4..7dc38cf 100644 --- a/libarchive/ffi.py +++ b/libarchive/ffi.py @@ -35,9 +35,6 @@ ARCHIVE_FAILED = -25 # Current operation cannot complete. ARCHIVE_FATAL = -30 # No more operations are possible. -ARCHIVE_SYMLINK_MODE_HYBRID = 'H' -ARCHIVE_SYMLINK_MODE_LOGICAL = 'L' -ARCHIVE_SYMLINK_MODE_PHYSICAL = 'P' # Callback types diff --git a/libarchive/write.py b/libarchive/write.py index a058e53..782e0f7 100644 --- a/libarchive/write.py +++ b/libarchive/write.py @@ -46,7 +46,7 @@ def add_entries(self, entries): def add_files( self, *paths, flags=0, lookup=False, pathname=None, recursive=True, - symlink_mode=ffi.ARCHIVE_SYMLINK_MODE_PHYSICAL, **attributes + symlink_mode=None, **attributes ): """Read files through the OS and add them to the archive. @@ -63,11 +63,9 @@ def add_files( recursive (bool): when False, if a path in `paths` is a directory, only the directory itself is added. - symlink_mode (enum): - Determines how symlinks are traversed. Valid options are - ARCHIVE_SYMLINK_MODE_HYBRID, ARCHIVE_SYMLINK_MODE_LOGICAL, and - ARCHIVE_SYMLINK_MODE_PHYSICAL as defined in the ffi module. - Default value matches default from libarchive. + symlink_mode (Literal['hybrid', 'logical', 'physical'] | None): + how symbolic links should be handled; see `man archive_read_disk` + for meanings attributes (dict): passed to `ArchiveEntry.modify()` Raises: @@ -80,18 +78,23 @@ def add_files( if block_size <= 0: block_size = 10240 # pragma: no cover + set_symlink_mode = None + if symlink_mode: + try: + set_symlink_mode = getattr( + ffi, f'read_disk_set_symlink_{symlink_mode}' + ) + except AttributeError: + raise ValueError( + f"symlink_mode value {symlink_mode!r} is invalid" + ) from None + entry = ArchiveEntry(header_codec=self.header_codec) entry_p = entry._entry_p for path in paths: with new_archive_read_disk(path, flags, lookup) as read_p: - if (symlink_mode == ffi.ARCHIVE_SYMLINK_MODE_PHYSICAL): - ffi.read_disk_set_symlink_physical(read_p) - elif (symlink_mode == ffi.ARCHIVE_SYMLINK_MODE_LOGICAL): - ffi.read_disk_set_symlink_logical(read_p) - elif (symlink_mode == ffi.ARCHIVE_SYMLINK_MODE_HYBRID): - ffi.read_disk_set_symlink_hybrid(read_p) - else: - raise ValueError(f"Bad symlink mode value {symlink_mode}") + if set_symlink_mode: + set_symlink_mode(read_p) while 1: r = read_next_header2(read_p, entry_p) if r == ARCHIVE_EOF: From f170918e2412510af99bbf23519adf9366eeb733 Mon Sep 17 00:00:00 2001 From: Changaco Date: Sun, 13 Apr 2025 12:00:34 +0200 Subject: [PATCH 3/5] add missing error checks --- libarchive/ffi.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/libarchive/ffi.py b/libarchive/ffi.py index 7dc38cf..172fe87 100644 --- a/libarchive/ffi.py +++ b/libarchive/ffi.py @@ -286,9 +286,9 @@ def get_write_filter_function(filter_name): ffi('read_disk_open', [c_archive_p, c_char_p], c_int, check_int) ffi('read_disk_open_w', [c_archive_p, c_wchar_p], c_int, check_int) ffi('read_disk_descend', [c_archive_p], c_int, check_int) -ffi('read_disk_set_symlink_hybrid', [c_archive_p], c_int) -ffi('read_disk_set_symlink_logical', [c_archive_p], c_int) -ffi('read_disk_set_symlink_physical', [c_archive_p], c_int) +ffi('read_disk_set_symlink_hybrid', [c_archive_p], c_int, check_int) +ffi('read_disk_set_symlink_logical', [c_archive_p], c_int, check_int) +ffi('read_disk_set_symlink_physical', [c_archive_p], c_int, check_int) # archive_read_data From 232da89e5b423fd8933616c514d89e580c21ff76 Mon Sep 17 00:00:00 2001 From: Changaco Date: Mon, 14 Apr 2025 14:10:32 +0200 Subject: [PATCH 4/5] add test of `symlink_mode` argument --- tests/test_rwx.py | 30 ++++++++++++++++++++++++++++++ 1 file changed, 30 insertions(+) diff --git a/tests/test_rwx.py b/tests/test_rwx.py index 77c16fc..bac04eb 100644 --- a/tests/test_rwx.py +++ b/tests/test_rwx.py @@ -2,6 +2,7 @@ import io import json +import os import libarchive from libarchive.entry import format_time @@ -181,3 +182,32 @@ def write_callback(data): ) assert archive_entry.uid == 1000 assert archive_entry.gid == 1000 + + +def test_symlinks(tmpdir): + os.chdir(tmpdir) + with open('empty', 'w'): + pass + with open('unreadable', 'w') as f: + f.write('secret') + os.chmod('unreadable', 0) + + os.symlink('empty', 'symlink-to-empty') + os.symlink('unreadable', 'symlink-to-unreadable') + + with libarchive.file_writer('archive.tar', 'gnutar') as archive: + archive.add_files('symlink-to-empty', symlink_mode='hybrid') + with pytest.raises(libarchive.ArchiveError): + archive.add_files('symlink-to-unreadable', symlink_mode='logical') + archive.add_files('symlink-to-unreadable', symlink_mode='physical') + + with libarchive.file_reader('archive.tar') as archive: + entries = iter(archive) + e1 = next(entries) + assert e1.pathname == 'symlink-to-empty' + assert e1.isreg + assert e1.size == 0 + e2 = next(entries) + assert e2.pathname == 'symlink-to-unreadable' + assert e2.issym + assert e2.linkpath == 'unreadable' From b313c1d4665d800a2fe6644db6bd22e011d141a7 Mon Sep 17 00:00:00 2001 From: Changaco Date: Sat, 19 Apr 2025 09:30:32 +0200 Subject: [PATCH 5/5] document the new `symlink_mode` argument --- README.rst | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/README.rst b/README.rst index 2a21841..43f402c 100644 --- a/README.rst +++ b/README.rst @@ -114,6 +114,14 @@ and the optional third argument is the compression format (called “filter” i libarchive). The acceptable values are listed in ``libarchive.ffi.WRITE_FORMATS`` and ``libarchive.ffi.WRITE_FILTERS``. +Symbolic links +~~~~~~~~~~~~~~ + +By default, libarchive preserves symbolic links. If you want it to resolve the +links and archive the files they point to instead, pass ``symlink_mode='logical'`` +when calling the ``add_files`` method. If you do that, an ``ArchiveError`` +exception will be raised when a symbolic link points to a nonexistent file. + File metadata codecs --------------------