Skip to content

Commit 8e97428

Browse files
martin-kokosadriangb
authored andcommitted
Cache: Add logic and tests for FileCache handling corrupt files (python-poetry#7453)
1 parent ed87de4 commit 8e97428

2 files changed

Lines changed: 69 additions & 1 deletion

File tree

src/poetry/utils/cache.py

Lines changed: 11 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,7 @@
44
import dataclasses
55
import hashlib
66
import json
7+
import logging
78
import shutil
89
import time
910

@@ -28,6 +29,8 @@
2829
MAX_DATE = 9999999999
2930
T = TypeVar("T")
3031

32+
logger = logging.getLogger(__name__)
33+
3134

3235
def decode(string: bytes, encodings: list[str] | None = None) -> str:
3336
"""
@@ -182,7 +185,14 @@ def _get_payload(self, key: str) -> T | None:
182185
return None
183186

184187
with open(path, "rb") as f:
185-
payload = self._deserialize(f.read())
188+
file_content = f.read()
189+
190+
try:
191+
payload = self._deserialize(file_content)
192+
except (json.JSONDecodeError, ValueError):
193+
self.forget(key)
194+
logger.warning("Corrupt cache file was detected and cleaned up.")
195+
return None
186196

187197
if payload.expired:
188198
self.forget(key)

tests/utils/test_cache.py

Lines changed: 58 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,7 @@
11
from __future__ import annotations
22

3+
import shutil
4+
35
from pathlib import Path
46
from typing import TYPE_CHECKING
57
from typing import Any
@@ -198,6 +200,62 @@ def test_cachy_compatibility(
198200
assert cachy_file_cache.get("key4") == test_obj
199201

200202

203+
def test_missing_cache_file(poetry_file_cache: FileCache) -> None:
204+
poetry_file_cache.put("key1", "value")
205+
206+
key1_path = (
207+
poetry_file_cache.path
208+
/ "81/74/09/96/87/a2/66/21/8174099687a26621f4e2cdd7cc03b3dacedb3fb962255b1aafd033cabe831530" # noqa: E501
209+
)
210+
assert key1_path.exists()
211+
key1_path.unlink() # corrupt cache by removing a key file
212+
213+
assert poetry_file_cache.get("key1") is None
214+
215+
216+
def test_missing_cache_path(poetry_file_cache: FileCache) -> None:
217+
poetry_file_cache.put("key1", "value")
218+
219+
key1_partial_path = poetry_file_cache.path / "81/74/09/96/87/a2/"
220+
assert key1_partial_path.exists()
221+
shutil.rmtree(
222+
key1_partial_path
223+
) # corrupt cache by removing a subdirectory containing a key file
224+
225+
assert poetry_file_cache.get("key1") is None
226+
227+
228+
@pytest.mark.parametrize(
229+
"corrupt_payload",
230+
[
231+
"", # empty file
232+
b"\x00", # null
233+
"99999999", # truncated file
234+
'999999a999"value"', # corrupt lifetime
235+
b'9999999999"va\xd8\x00"', # invalid unicode
236+
"fil3systemFa!led", # garbage file
237+
],
238+
)
239+
def test_detect_corrupted_cache_key_file(
240+
corrupt_payload: str | bytes, poetry_file_cache: FileCache
241+
) -> None:
242+
poetry_file_cache.put("key1", "value")
243+
244+
key1_path = (
245+
poetry_file_cache.path
246+
/ "81/74/09/96/87/a2/66/21/8174099687a26621f4e2cdd7cc03b3dacedb3fb962255b1aafd033cabe831530" # noqa: E501
247+
)
248+
assert key1_path.exists()
249+
250+
# original content: 9999999999"value"
251+
252+
write_modes = {str: "w", bytes: "wb"}
253+
with open(key1_path, write_modes[type(corrupt_payload)]) as f:
254+
f.write(corrupt_payload) # write corrupt data
255+
256+
assert poetry_file_cache.get("key1") is None
257+
258+
201259
def test_get_cache_directory_for_link(tmp_path: Path) -> None:
202260
cache = ArtifactCache(cache_dir=tmp_path)
203261
directory = cache.get_cache_directory_for_link(

0 commit comments

Comments
 (0)