Skip to content

Commit 3ad919f

Browse files
committed
Tar extract switch from shutil to tar and use subprocess
1 parent 21b8da6 commit 3ad919f

File tree

1 file changed

+52
-24
lines changed

1 file changed

+52
-24
lines changed

integration-tests/tests/fixtures/integration_test_logs.py

Lines changed: 52 additions & 24 deletions
Original file line numberDiff line numberDiff line change
@@ -10,7 +10,6 @@
1010
IntegrationTestLogs,
1111
IntegrationTestPathConfig,
1212
)
13-
from tests.utils.utils import unlink
1413

1514
logger = logging.getLogger(__name__)
1615

@@ -48,7 +47,18 @@ def _download_and_extract_dataset(
4847
integration_test_path_config: IntegrationTestPathConfig,
4948
name: str,
5049
tarball_url: str,
50+
has_leading_directory_component: bool = True,
5151
) -> IntegrationTestLogs:
52+
"""
53+
:param request:
54+
:param integration_test_path_config:
55+
"param name:
56+
:param tarball_url:
57+
:param has_leading_directory_component: Whether all files inside the tarball are stored under a
58+
single top level directory. Defaults to True.
59+
:return: The IntegrationTestPathConfig object with its associated logs properly downloaded,
60+
extracted, and permission changed to be overritable.
61+
"""
5262
integration_test_logs = IntegrationTestLogs(
5363
name=name,
5464
tarball_url=tarball_url,
@@ -58,38 +68,56 @@ def _download_and_extract_dataset(
5868
logger.info("Test logs `%s` are up-to-date. Skipping download.", name)
5969
return integration_test_logs
6070

71+
integration_test_logs.tarball_path.unlink(missing_ok=True)
72+
shutil.rmtree(integration_test_logs.extraction_dir)
73+
integration_test_logs.extraction_dir.mkdir(parents=True, exist_ok=False)
74+
75+
tarball_path_str = str(integration_test_logs.tarball_path)
76+
extract_path_str = str(integration_test_logs.extraction_dir)
77+
6178
curl_bin = shutil.which("curl")
6279
if curl_bin is None:
6380
err_msg = "curl executable not found"
6481
raise RuntimeError(err_msg)
6582

66-
try:
67-
# fmt: off
68-
curl_cmds = [
69-
curl_bin,
70-
"--fail",
71-
"--location",
72-
"--output", str(integration_test_logs.tarball_path),
73-
"--show-error",
74-
tarball_url,
75-
]
76-
# fmt: on
77-
subprocess.run(curl_cmds, check=True)
78-
79-
unlink(integration_test_logs.extraction_dir)
80-
shutil.unpack_archive(
81-
integration_test_logs.tarball_path, integration_test_logs.extraction_dir
82-
)
83-
except Exception as e:
84-
err_msg = f"Failed to download and extract dataset `{name}`."
85-
raise RuntimeError(err_msg) from e
86-
87-
# Allow the extracted content to be deletable or overwritable
83+
# fmt: off
84+
curl_cmd = [
85+
curl_bin,
86+
"--fail",
87+
"--location",
88+
"--output", tarball_path_str,
89+
"--show-error",
90+
tarball_url,
91+
]
92+
# fmt: on
93+
subprocess.run(curl_cmd, check=True)
94+
95+
tar_bin = shutil.which("tar")
96+
if tar_bin is None:
97+
err_msg = "tar executable not found"
98+
raise RuntimeError(err_msg)
99+
100+
# fmt: off
101+
extract_cmd = [
102+
tar_bin,
103+
"--extract",
104+
"--gzip",
105+
"--file", tarball_path_str,
106+
"-C", extract_path_str,
107+
]
108+
# fmt: on
109+
if has_leading_directory_component:
110+
extract_cmd.extend(["--strip-components", "1"])
111+
subprocess.run(extract_cmd, check=True)
112+
88113
chmod_bin = shutil.which("chmod")
89114
if chmod_bin is None:
90115
err_msg = "chmod executable not found"
91116
raise RuntimeError(err_msg)
92-
subprocess.run([chmod_bin, "-R", "gu+w", integration_test_logs.extraction_dir], check=True)
117+
118+
# Allow the downloaded and extracted contents to be deletable or overwritable
119+
subprocess.run([chmod_bin, "-R", "gu+w", tarball_path_str], check=True)
120+
subprocess.run([chmod_bin, "-R", "gu+w", extract_path_str], check=True)
93121

94122
logger.info("Downloaded and extracted uncompressed logs for dataset `%s`.", name)
95123
request.config.cache.set(name, True)

0 commit comments

Comments
 (0)