Skip to content

Commit 0118da1

Browse files
committed
add prefetching of index in legacy repositories
1 parent a9d05f5 commit 0118da1

File tree

6 files changed

+127
-2
lines changed

6 files changed

+127
-2
lines changed

docs/repositories.md

Lines changed: 15 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -170,3 +170,18 @@ default = true
170170
```
171171

172172
A default source will also be the fallback source if you add other sources.
173+
174+
### Enabling indexing for PEP503 repositories
175+
176+
Repositories following the [PEP503](https://peps.python.org/pep-0503/) specification should expose a
177+
root page with individual links for each package it serves. This isn't reliably implemented
178+
everywhere, which leads to increased network traffic and slower resolve times. If you're using a
179+
repository which has a valid listing, you can add the `indexed` property to let Poetry prefetch and
180+
cache this package list.
181+
182+
```toml
183+
[[tool.poetry.source]]
184+
name = "foo"
185+
url = "https://foo.bar/simple/"
186+
indexed = true
187+
```

src/poetry/factory.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -168,13 +168,15 @@ def create_legacy_repository(
168168
raise RuntimeError("Missing [name] in source.")
169169
name = source["name"]
170170
url = source["url"]
171+
indexed = bool(source.get("indexed", False))
171172

172173
return LegacyRepository(
173174
name,
174175
url,
175176
config=auth_config,
176177
cert=get_cert(auth_config, name),
177178
client_cert=get_client_cert(auth_config, name),
179+
indexed=indexed,
178180
)
179181

180182
@classmethod

src/poetry/repositories/legacy_repository.py

Lines changed: 17 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,7 @@
88
from poetry.inspection.info import PackageInfo
99
from poetry.repositories.exceptions import PackageNotFound
1010
from poetry.repositories.http import HTTPRepository
11+
from poetry.repositories.link_sources.html import SimpleIndexPage
1112
from poetry.repositories.link_sources.html import SimpleRepositoryPage
1213
from poetry.utils.helpers import canonicalize_name
1314

@@ -30,6 +31,7 @@ def __init__(
3031
disable_cache: bool = False,
3132
cert: Path | None = None,
3233
client_cert: Path | None = None,
34+
indexed: bool = False,
3335
) -> None:
3436
if name == "pypi":
3537
raise ValueError("The name [pypi] is reserved for repositories")
@@ -38,6 +40,10 @@ def __init__(
3840
name, url.rstrip("/"), config, disable_cache, cert, client_cert
3941
)
4042

43+
self._index_page = None
44+
if indexed:
45+
self._index_page = self._get_index_page()
46+
4147
def find_packages(self, dependency: Dependency) -> list[Package]:
4248
packages = []
4349
constraint, allow_prereleases = self._get_constraints_from_dependency(
@@ -53,6 +59,11 @@ def find_packages(self, dependency: Dependency) -> list[Package]:
5359
if self._cache.store("matches").has(key):
5460
versions = self._cache.store("matches").get(key)
5561
else:
62+
if self._index_page is not None and not self._index_page.serves_package(
63+
dependency.name
64+
):
65+
return []
66+
5667
page = self._get_page(f"/{dependency.name.replace('.', '-')}/")
5768
if page is None:
5869
return []
@@ -152,3 +163,9 @@ def _get_page(self, endpoint: str) -> SimpleRepositoryPage | None:
152163
if not response:
153164
return None
154165
return SimpleRepositoryPage(response.url, response.text)
166+
167+
def _get_index_page(self) -> SimpleIndexPage | None:
168+
response = self._get_response("")
169+
if not response:
170+
return None
171+
return SimpleIndexPage(response.url, response.text)

src/poetry/repositories/link_sources/html.py

Lines changed: 27 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -43,3 +43,30 @@ def __init__(self, url: str, content: str) -> None:
4343
if not url.endswith("/"):
4444
url += "/"
4545
super().__init__(url=url, content=content)
46+
47+
48+
class SimpleIndexPage:
49+
"""Describes the root page of a PEP503 compliant repository.
50+
51+
This contains a list of links, each one corresponding to a served project.
52+
"""
53+
54+
def __init__(self, url: str, content: str) -> None:
55+
if not url.endswith("/"):
56+
url += "/"
57+
58+
self._url = url
59+
self._content = content
60+
self._parsed = html5lib.parse(content, namespaceHTMLElements=False)
61+
self._cached_packages = set(self.links)
62+
63+
@property
64+
def links(self) -> Iterator[Link]:
65+
# Note: PEP426 specifies that comparisons should be
66+
# case-insensitive. For simplicity, we'll do lookups using
67+
# lowercase-naming, and treating - and _ equivalently.
68+
for anchor in self._parsed.findall(".//a"):
69+
yield anchor.text.lower().replace("-", "_")
70+
71+
def serves_package(self, name: str) -> bool:
72+
return name.lower().replace("-", "_") in self._cached_packages
Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,3 @@
1+
<a href="pyyaml/">pyyaml</a>
2+
<a href="missing-version/">missing-version</a>
3+
<a href="black/">black</a>

tests/repositories/test_legacy_repository.py

Lines changed: 63 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -14,6 +14,7 @@
1414
from poetry.repositories.exceptions import PackageNotFound
1515
from poetry.repositories.exceptions import RepositoryError
1616
from poetry.repositories.legacy_repository import LegacyRepository
17+
from poetry.repositories.link_sources.html import SimpleIndexPage
1718
from poetry.repositories.link_sources.html import SimpleRepositoryPage
1819

1920

@@ -32,8 +33,10 @@ class MockRepository(LegacyRepository):
3233

3334
FIXTURES = Path(__file__).parent / "fixtures" / "legacy"
3435

35-
def __init__(self) -> None:
36-
super().__init__("legacy", url="http://legacy.foo.bar", disable_cache=True)
36+
def __init__(self, indexed: bool = False) -> None:
37+
super().__init__(
38+
"legacy", url="http://legacy.foo.bar", disable_cache=True, indexed=indexed
39+
)
3740

3841
def _get_page(self, endpoint: str) -> SimpleRepositoryPage | None:
3942
parts = endpoint.split("/")
@@ -338,6 +341,64 @@ def test_get_package_retrieves_packages_with_no_hashes():
338341
] == package.files
339342

340343

344+
def test_unindexed_has_no_root_page():
345+
repo = MockRepository()
346+
assert not repo._index_page
347+
348+
349+
class MockIndexedRepository(MockRepository):
350+
def __init__(self) -> None:
351+
super().__init__(True)
352+
353+
def _get_index_page(self) -> SimpleIndexPage | None:
354+
fixture = self.FIXTURES / "index.html"
355+
if not fixture.exists():
356+
return
357+
358+
with fixture.open(encoding="utf-8") as f:
359+
return SimpleIndexPage(self._url + "/", f.read())
360+
361+
362+
def test_indexed_has_root_page():
363+
repo = MockIndexedRepository()
364+
assert repo._index_page
365+
366+
367+
def test_indexed_root_page_has_valid_content():
368+
repo = MockIndexedRepository()
369+
assert repo._index_page.serves_package("pyyaml")
370+
371+
372+
def test_indexed_fails_on_missing():
373+
repo = MockIndexedRepository()
374+
375+
packages = repo.find_packages(Factory.create_dependency("this-doesnt-exist", "*"))
376+
377+
assert packages == []
378+
379+
380+
def test_indexed_succeeds_on_existing():
381+
repo = MockIndexedRepository()
382+
383+
packages = repo.find_packages(Factory.create_dependency("pyyaml", "*"))
384+
385+
assert len(packages) == 1
386+
387+
388+
def test_indexed_pep426_underscore_hyphen():
389+
repo = MockIndexedRepository()
390+
391+
# 'missing-version' in the index
392+
assert repo._index_page.serves_package("missing_version")
393+
394+
395+
def test_indexed_pep426_case_insensitive():
396+
repo = MockIndexedRepository()
397+
398+
# 'black' in the index
399+
assert repo._index_page.serves_package("Black")
400+
401+
341402
class MockHttpRepository(LegacyRepository):
342403
def __init__(self, endpoint_responses: dict, http: type[httpretty.httpretty]):
343404
base_url = "http://legacy.foo.bar"

0 commit comments

Comments
 (0)