Skip to content

Commit 3ef0705

Browse files
committed
PEP-710: implement provenance_url.json file
Signed-off-by: Fridolin Pokorny <fridolin.pokorny@gmail.com>
1 parent 59a9d2c commit 3ef0705

7 files changed

Lines changed: 322 additions & 17 deletions

File tree

news/11865.feature.rst

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
Implement PEP-710 for storing provenance_url.json file.

src/pip/_internal/models/direct_url.py

Lines changed: 12 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -17,6 +17,7 @@
1717
T = TypeVar("T")
1818

1919
DIRECT_URL_METADATA_NAME = "direct_url.json"
20+
PROVENANCE_URL_METADATA_NAME = "provenance_url.json"
2021
ENV_VAR_RE = re.compile(r"^\$\{[A-Za-z0-9-_]+\}(:\$\{[A-Za-z0-9-_]+\})?$")
2122

2223

@@ -205,20 +206,27 @@ def from_dict(cls, d: Dict[str, Any]) -> "DirectUrl":
205206
),
206207
)
207208

208-
def to_dict(self) -> Dict[str, Any]:
209+
def to_dict(self, *, keep_legacy_hash_key: bool = True) -> Dict[str, Any]:
209210
res = _filter_none(
210211
url=self.redacted_url,
211212
subdirectory=self.subdirectory,
212213
)
213-
res[self.info.name] = self.info._to_dict()
214+
215+
info_dict = self.info._to_dict()
216+
if not keep_legacy_hash_key:
217+
info_dict.pop("hash", None)
218+
219+
res[self.info.name] = info_dict
214220
return res
215221

216222
@classmethod
217223
def from_json(cls, s: str) -> "DirectUrl":
218224
return cls.from_dict(json.loads(s))
219225

220-
def to_json(self) -> str:
221-
return json.dumps(self.to_dict(), sort_keys=True)
226+
def to_json(self, *, keep_legacy_hash_key: bool = True) -> str:
227+
return json.dumps(
228+
self.to_dict(keep_legacy_hash_key=keep_legacy_hash_key), sort_keys=True
229+
)
222230

223231
def is_local_editable(self) -> bool:
224232
return isinstance(self.info, DirInfo) and self.info.editable

src/pip/_internal/operations/install/wheel.py

Lines changed: 28 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -48,7 +48,12 @@
4848
FilesystemWheel,
4949
get_wheel_distribution,
5050
)
51-
from pip._internal.models.direct_url import DIRECT_URL_METADATA_NAME, DirectUrl
51+
from pip._internal.models.direct_url import (
52+
DIRECT_URL_METADATA_NAME,
53+
PROVENANCE_URL_METADATA_NAME,
54+
ArchiveInfo,
55+
DirectUrl,
56+
)
5257
from pip._internal.models.scheme import SCHEME_KEYS, Scheme
5358
from pip._internal.utils.filesystem import adjacent_tmp_file, replace
5459
from pip._internal.utils.misc import StreamWrapper, ensure_dir, hash_file, partition
@@ -424,9 +429,10 @@ def _install_wheel( # noqa: C901, PLR0915 function is too long
424429
wheel_zip: ZipFile,
425430
wheel_path: str,
426431
scheme: Scheme,
432+
download_info: DirectUrl,
433+
is_direct: bool,
427434
pycompile: bool = True,
428435
warn_script_location: bool = True,
429-
direct_url: Optional[DirectUrl] = None,
430436
requested: bool = False,
431437
) -> None:
432438
"""Install a wheel.
@@ -673,12 +679,25 @@ def _generate_file(path: str, **kwargs: Any) -> Generator[BinaryIO, None, None]:
673679
installer_file.write(b"pip\n")
674680
generated.append(installer_path)
675681

676-
# Record the PEP 610 direct URL reference
677-
if direct_url is not None:
682+
if is_direct:
683+
# Record the PEP 610 direct URL reference
678684
direct_url_path = os.path.join(dest_info_dir, DIRECT_URL_METADATA_NAME)
679685
with _generate_file(direct_url_path) as direct_url_file:
680-
direct_url_file.write(direct_url.to_json().encode("utf-8"))
686+
direct_url_file.write(download_info.to_json().encode("utf-8"))
681687
generated.append(direct_url_path)
688+
else:
689+
# Record the PEP 710 provenance URL reference only if we have hashes for
690+
# the given wheel. They can be missing when wheels are built using an old pip.
691+
assert isinstance(download_info.info, ArchiveInfo)
692+
if download_info.info.hashes:
693+
provenance_url_path = os.path.join(
694+
dest_info_dir, PROVENANCE_URL_METADATA_NAME
695+
)
696+
with _generate_file(provenance_url_path) as provenance_url_file:
697+
provenance_url_file.write(
698+
download_info.to_json(keep_legacy_hash_key=False).encode("utf-8")
699+
)
700+
generated.append(provenance_url_path)
682701

683702
# Record the REQUESTED file
684703
if requested:
@@ -721,10 +740,11 @@ def install_wheel(
721740
name: str,
722741
wheel_path: str,
723742
scheme: Scheme,
743+
download_info: DirectUrl,
744+
is_direct: bool,
724745
req_description: str,
725746
pycompile: bool = True,
726747
warn_script_location: bool = True,
727-
direct_url: Optional[DirectUrl] = None,
728748
requested: bool = False,
729749
) -> None:
730750
with ZipFile(wheel_path, allowZip64=True) as z:
@@ -734,8 +754,9 @@ def install_wheel(
734754
wheel_zip=z,
735755
wheel_path=wheel_path,
736756
scheme=scheme,
757+
download_info=download_info,
758+
is_direct=is_direct,
737759
pycompile=pycompile,
738760
warn_script_location=warn_script_location,
739-
direct_url=direct_url,
740761
requested=requested,
741762
)

src/pip/_internal/req/req_install.py

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -861,17 +861,19 @@ def install(
861861
self.install_succeeded = True
862862
return
863863

864+
assert self.download_info
864865
assert self.is_wheel
865866
assert self.local_file_path
866867

867868
install_wheel(
868869
self.req.name,
869870
self.local_file_path,
870871
scheme=scheme,
872+
download_info=self.download_info,
873+
is_direct=self.is_direct,
871874
req_description=str(self.req),
872875
pycompile=pycompile,
873876
warn_script_location=warn_script_location,
874-
direct_url=self.download_info if self.is_direct else None,
875877
requested=self.user_supplied,
876878
)
877879
self.install_succeeded = True

tests/functional/test_install.py

Lines changed: 153 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,6 @@
11
import hashlib
22
import io
3+
import json
34
import os
45
import re
56
import ssl
@@ -9,7 +10,7 @@
910
import textwrap
1011
from os.path import curdir, join, pardir
1112
from pathlib import Path
12-
from typing import Dict, Iterable, List, Optional, Tuple
13+
from typing import Any, Dict, Iterable, List, Optional, Tuple
1314

1415
import pytest
1516

@@ -2743,3 +2744,154 @@ def add_link(tar: tarfile.TarFile, name: str, linktype: str, target: str) -> Non
27432744
# Run the internal test
27442745
result = script.run("python", "-m", "linktest")
27452746
assert result.stdout.strip() == "8 files checked"
2747+
2748+
2749+
def _check_provenance_url(provenance_url: Dict[str, Any]) -> None:
2750+
assert "archive_info" in provenance_url
2751+
assert "url" in provenance_url
2752+
assert (
2753+
len(provenance_url) == 2
2754+
), "provenance_url.json should hold only archive_info and url keys"
2755+
2756+
assert "hashes" in provenance_url["archive_info"]
2757+
assert len(provenance_url["archive_info"]["hashes"]) > 0
2758+
2759+
2760+
@pytest.mark.parametrize(
2761+
"pkg_name, pkg_version, distribution",
2762+
[
2763+
pytest.param(
2764+
"simplewheel",
2765+
"1.0",
2766+
"simplewheel-1.0-py2.py3-none-any.whl",
2767+
id="wheel",
2768+
),
2769+
pytest.param(
2770+
"simple",
2771+
"1.0",
2772+
"simple-1.0.tar.gz",
2773+
id="sdist",
2774+
),
2775+
],
2776+
)
2777+
def test_install_provenance_url(
2778+
script: PipTestEnvironment,
2779+
data: TestData,
2780+
pkg_name: str,
2781+
pkg_version: str,
2782+
distribution: str,
2783+
) -> None:
2784+
"""Test installing a distribution from a simple API produces provenance_url.json."""
2785+
server = make_mock_server()
2786+
2787+
distribution_path = f"/files/{distribution}"
2788+
server.mock.side_effect = [
2789+
package_page(
2790+
{
2791+
distribution: distribution_path,
2792+
}
2793+
),
2794+
file_response(data.packages.joinpath(distribution)),
2795+
]
2796+
2797+
index_url = f"http://{server.host}:{server.port}"
2798+
2799+
pip_args = [
2800+
"install",
2801+
"-i",
2802+
index_url,
2803+
f"{pkg_name}=={pkg_version}",
2804+
]
2805+
with server_running(server):
2806+
result = script.pip(*pip_args)
2807+
2808+
result.assert_installed(
2809+
pkg_name=pkg_name, without_egg_link=True, editable=False
2810+
)
2811+
2812+
provenance_url_path = (
2813+
script.site_packages
2814+
/ f"{pkg_name}-{pkg_version}.dist-info"
2815+
/ "provenance_url.json"
2816+
)
2817+
2818+
assert result.files_created[
2819+
provenance_url_path
2820+
], "provenance_url.json was not created"
2821+
2822+
provenance_url_full_path = result.files_created[provenance_url_path].full
2823+
2824+
with open(provenance_url_full_path) as f:
2825+
provenance_url_content = json.load(f)
2826+
2827+
_check_provenance_url(provenance_url_content)
2828+
assert provenance_url_content["url"] == f"{index_url}{distribution_path}"
2829+
2830+
2831+
def test_install_provenance_url_cached(
2832+
script: PipTestEnvironment, data: TestData
2833+
) -> None:
2834+
"""Test installing a cached distribution produced provenance_url.json."""
2835+
pkg_name = "simple"
2836+
pkg_version = "1.0"
2837+
distribution = "simple-1.0.tar.gz"
2838+
2839+
server = make_mock_server()
2840+
2841+
distribution_path = f"/files/{distribution}"
2842+
server.mock.side_effect = [
2843+
package_page(
2844+
{
2845+
distribution: distribution_path,
2846+
}
2847+
),
2848+
file_response(data.packages.joinpath(distribution)),
2849+
] * 2
2850+
2851+
index_url = f"http://{server.host}:{server.port}"
2852+
2853+
pip_args = [
2854+
"install",
2855+
"-i",
2856+
index_url,
2857+
f"{pkg_name}=={pkg_version}",
2858+
]
2859+
2860+
with server_running(server):
2861+
result = script.pip(*pip_args)
2862+
2863+
result.assert_installed(
2864+
pkg_name=pkg_name, without_egg_link=True, editable=False
2865+
)
2866+
2867+
provenance_url_path = (
2868+
script.site_packages
2869+
/ f"{pkg_name}-{pkg_version}.dist-info"
2870+
/ "provenance_url.json"
2871+
)
2872+
2873+
assert result.files_created[
2874+
provenance_url_path
2875+
], "provenance_url.json was not created"
2876+
2877+
provenance_url_full_path = result.files_created[provenance_url_path].full
2878+
2879+
with open(provenance_url_full_path) as f:
2880+
provenance_url_content = json.load(f)
2881+
2882+
_check_provenance_url(provenance_url_content)
2883+
assert provenance_url_content["url"] == f"{index_url}{distribution_path}"
2884+
2885+
os.unlink(provenance_url_full_path)
2886+
2887+
pip_args.append("--ignore-installed")
2888+
result = script.pip(*pip_args)
2889+
2890+
assert f"Using cached {pkg_name}" in result.stdout
2891+
2892+
assert os.path.exists(provenance_url_full_path)
2893+
with open(provenance_url_full_path) as f:
2894+
new_provenance_url_content = json.load(f)
2895+
2896+
_check_provenance_url(new_provenance_url_content)
2897+
assert new_provenance_url_content == provenance_url_content

tests/unit/test_direct_url.py

Lines changed: 20 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -28,6 +28,26 @@ def test_to_json() -> None:
2828
)
2929

3030

31+
def test_to_json_no_keep_legacy_hash_key() -> None:
32+
direct_url = DirectUrl(
33+
url="https://pypi.org/simple/sample/sample-1.2.0-py3-none-any.whl",
34+
info=ArchiveInfo(
35+
hash="sha256=257ded4ea1fafa475f099e544b2d7560f674d42"
36+
"917e096d462e8a46a64f51245",
37+
hashes={
38+
"sha256": "257ded4ea1fafa475f099e544b2d7560f674d"
39+
"42917e096d462e8a46a64f51245",
40+
},
41+
),
42+
)
43+
direct_url.validate()
44+
assert direct_url.to_json(keep_legacy_hash_key=False) == (
45+
'{"archive_info": {"hashes": {'
46+
'"sha256": "257ded4ea1fafa475f099e544b2d7560f674d42917e096d462e8a46a64f51245"}'
47+
'}, "url": "https://pypi.org/simple/sample/sample-1.2.0-py3-none-any.whl"}'
48+
)
49+
50+
3151
def test_archive_info() -> None:
3252
direct_url_dict = {
3353
"url": "file:///home/user/archive.tgz",

0 commit comments

Comments
 (0)