From a8ea47f48c601505f2281fa204dfada39d7866fc Mon Sep 17 00:00:00 2001 From: TrellixVulnTeam Date: Mon, 12 Dec 2022 08:14:07 +0000 Subject: [PATCH] Adding tarfile member sanitization to extractall() --- .../importer/musicbrainz_dump_extractor.py | 21 ++++++++++++++++++- 1 file changed, 20 insertions(+), 1 deletion(-) diff --git a/soweego/importer/musicbrainz_dump_extractor.py b/soweego/importer/musicbrainz_dump_extractor.py index 269dd34c..ea0b242d 100644 --- a/soweego/importer/musicbrainz_dump_extractor.py +++ b/soweego/importer/musicbrainz_dump_extractor.py @@ -70,7 +70,26 @@ def extract_and_populate(self, dump_file_paths: List[str], resolve: bool): if not os.path.isdir(dump_path): with tarfile.open(dump_file_path, "r:bz2") as tar: LOGGER.info("Extracting dump %s in %s", dump_file_path, dump_path) - tar.extractall(dump_path) + def is_within_directory(directory, target): + + abs_directory = os.path.abspath(directory) + abs_target = os.path.abspath(target) + + prefix = os.path.commonprefix([abs_directory, abs_target]) + + return prefix == abs_directory + + def safe_extract(tar, path=".", members=None, *, numeric_owner=False): + + for member in tar.getmembers(): + member_path = os.path.join(path, member.name) + if not is_within_directory(path, member_path): + raise Exception("Attempted Path Traversal in Tar File") + + tar.extractall(path, members, numeric_owner=numeric_owner) + + + safe_extract(tar, dump_path) LOGGER.info("Extracted dump %s in %s", dump_file_path, dump_path) db_manager = DBManager()