1010 IntegrationTestLogs ,
1111 IntegrationTestPathConfig ,
1212)
13- from tests .utils .utils import unlink
1413
1514logger = logging .getLogger (__name__ )
1615
@@ -48,7 +47,18 @@ def _download_and_extract_dataset(
4847 integration_test_path_config : IntegrationTestPathConfig ,
4948 name : str ,
5049 tarball_url : str ,
50+ has_leading_directory_component : bool = True ,
5151) -> IntegrationTestLogs :
52+ """
53+ :param request:
54+ :param integration_test_path_config:
55+ "param name:
56+ :param tarball_url:
57+ :param has_leading_directory_component: Whether all files inside the tarball are stored under a
58+ single top level directory. Defaults to True.
59+ :return: The IntegrationTestPathConfig object with its associated logs properly downloaded,
60+ extracted, and permission changed to be overritable.
61+ """
5262 integration_test_logs = IntegrationTestLogs (
5363 name = name ,
5464 tarball_url = tarball_url ,
@@ -58,38 +68,56 @@ def _download_and_extract_dataset(
5868 logger .info ("Test logs `%s` are up-to-date. Skipping download." , name )
5969 return integration_test_logs
6070
71+ integration_test_logs .tarball_path .unlink (missing_ok = True )
72+ shutil .rmtree (integration_test_logs .extraction_dir )
73+ integration_test_logs .extraction_dir .mkdir (parents = True , exist_ok = False )
74+
75+ tarball_path_str = str (integration_test_logs .tarball_path )
76+ extract_path_str = str (integration_test_logs .extraction_dir )
77+
6178 curl_bin = shutil .which ("curl" )
6279 if curl_bin is None :
6380 err_msg = "curl executable not found"
6481 raise RuntimeError (err_msg )
6582
66- try :
67- # fmt: off
68- curl_cmds = [
69- curl_bin ,
70- "--fail" ,
71- "--location" ,
72- "--output" , str (integration_test_logs .tarball_path ),
73- "--show-error" ,
74- tarball_url ,
75- ]
76- # fmt: on
77- subprocess .run (curl_cmds , check = True )
78-
79- unlink (integration_test_logs .extraction_dir )
80- shutil .unpack_archive (
81- integration_test_logs .tarball_path , integration_test_logs .extraction_dir
82- )
83- except Exception as e :
84- err_msg = f"Failed to download and extract dataset `{ name } `."
85- raise RuntimeError (err_msg ) from e
86-
87- # Allow the extracted content to be deletable or overwritable
83+ # fmt: off
84+ curl_cmd = [
85+ curl_bin ,
86+ "--fail" ,
87+ "--location" ,
88+ "--output" , tarball_path_str ,
89+ "--show-error" ,
90+ tarball_url ,
91+ ]
92+ # fmt: on
93+ subprocess .run (curl_cmd , check = True )
94+
95+ tar_bin = shutil .which ("tar" )
96+ if tar_bin is None :
97+ err_msg = "tar executable not found"
98+ raise RuntimeError (err_msg )
99+
100+ # fmt: off
101+ extract_cmd = [
102+ tar_bin ,
103+ "--extract" ,
104+ "--gzip" ,
105+ "--file" , tarball_path_str ,
106+ "-C" , extract_path_str ,
107+ ]
108+ # fmt: on
109+ if has_leading_directory_component :
110+ extract_cmd .extend (["--strip-components" , "1" ])
111+ subprocess .run (extract_cmd , check = True )
112+
88113 chmod_bin = shutil .which ("chmod" )
89114 if chmod_bin is None :
90115 err_msg = "chmod executable not found"
91116 raise RuntimeError (err_msg )
92- subprocess .run ([chmod_bin , "-R" , "gu+w" , integration_test_logs .extraction_dir ], check = True )
117+
118+ # Allow the downloaded and extracted contents to be deletable or overwritable
119+ subprocess .run ([chmod_bin , "-R" , "gu+w" , tarball_path_str ], check = True )
120+ subprocess .run ([chmod_bin , "-R" , "gu+w" , extract_path_str ], check = True )
93121
94122 logger .info ("Downloaded and extracted uncompressed logs for dataset `%s`." , name )
95123 request .config .cache .set (name , True )
0 commit comments