Skip to content

Commit fd706c8

Browse files
maksbotanradoering
authored andcommitted
Cache git dependencies as wheels (#7473)
Currently, poetry install will clone, build and install every git dependency when it's not present in the environment. This is OK for developer's machines, but not OK for CI - there environment is always fresh, and installing git dependencies takes significant time on each CI run, especially if the dependency has C extensions that need to be built. This commit builds a wheel for every git dependency that has precise reference hash in lock file and is not required to be in editable mode, stores that wheel in a cache dir and will install from it instead of cloning the repository again.
1 parent dfb4904 commit fd706c8

File tree

5 files changed

+205
-31
lines changed

5 files changed

+205
-31
lines changed

src/poetry/installation/chef.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -94,8 +94,8 @@ def prepare(
9494
return archive
9595

9696
if archive.is_dir():
97-
tmp_dir = tempfile.mkdtemp(prefix="poetry-chef-")
98-
return self._prepare(archive, Path(tmp_dir), editable=editable)
97+
destination = output_dir or Path(tempfile.mkdtemp(prefix="poetry-chef-"))
98+
return self._prepare(archive, destination=destination, editable=editable)
9999

100100
return self._prepare_sdist(archive, destination=output_dir)
101101

src/poetry/installation/executor.py

Lines changed: 34 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -529,7 +529,7 @@ def _install(self, operation: Install | Update) -> int:
529529
cleanup_archive: bool = False
530530
if package.source_type == "git":
531531
archive = self._prepare_git_archive(operation)
532-
cleanup_archive = True
532+
cleanup_archive = operation.package.develop
533533
elif package.source_type == "file":
534534
archive = self._prepare_archive(operation)
535535
elif package.source_type == "directory":
@@ -584,7 +584,9 @@ def _remove(self, package: Package) -> int:
584584

585585
raise
586586

587-
def _prepare_archive(self, operation: Install | Update) -> Path:
587+
def _prepare_archive(
588+
self, operation: Install | Update, *, output_dir: Path | None = None
589+
) -> Path:
588590
package = operation.package
589591
operation_message = self.get_operation_message(operation)
590592

@@ -603,20 +605,35 @@ def _prepare_archive(self, operation: Install | Update) -> Path:
603605

604606
self._populate_hashes_dict(archive, package)
605607

606-
return self._chef.prepare(archive, editable=package.develop)
608+
return self._chef.prepare(
609+
archive, editable=package.develop, output_dir=output_dir
610+
)
607611

608612
def _prepare_git_archive(self, operation: Install | Update) -> Path:
609613
from poetry.vcs.git import Git
610614

611615
package = operation.package
616+
assert package.source_url is not None
617+
618+
if package.source_resolved_reference and not package.develop:
619+
# Only cache git archives when we know precise reference hash,
620+
# otherwise we might get stale archives
621+
cached_archive = self._artifact_cache.get_cached_archive_for_git(
622+
package.source_url,
623+
package.source_resolved_reference,
624+
package.source_subdirectory,
625+
env=self._env,
626+
)
627+
if cached_archive is not None:
628+
return cached_archive
629+
612630
operation_message = self.get_operation_message(operation)
613631

614632
message = (
615633
f" <fg=blue;options=bold>•</> {operation_message}: <info>Cloning...</info>"
616634
)
617635
self._write(operation, message)
618636

619-
assert package.source_url is not None
620637
source = Git.clone(
621638
url=package.source_url,
622639
source_root=self._env.path / "src",
@@ -627,10 +644,22 @@ def _prepare_git_archive(self, operation: Install | Update) -> Path:
627644
original_url = package.source_url
628645
package._source_url = str(source.path)
629646

630-
archive = self._prepare_archive(operation)
647+
output_dir = None
648+
if package.source_resolved_reference and not package.develop:
649+
output_dir = self._artifact_cache.get_cache_directory_for_git(
650+
original_url,
651+
package.source_resolved_reference,
652+
package.source_subdirectory,
653+
)
631654

655+
archive = self._prepare_archive(operation, output_dir=output_dir)
632656
package._source_url = original_url
633657

658+
if output_dir is not None and output_dir.is_dir():
659+
# Mark directories with cached git packages, to distinguish from
660+
# "normal" cache
661+
(output_dir / ".created_from_git_dependency").touch()
662+
634663
return archive
635664

636665
def _install_directory_without_wheel_installer(

src/poetry/utils/cache.py

Lines changed: 39 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -231,35 +231,70 @@ def get_cache_directory_for_link(self, link: Link) -> Path:
231231
if link.subdirectory_fragment:
232232
key_parts["subdirectory"] = link.subdirectory_fragment
233233

234+
return self._get_directory_from_hash(key_parts)
235+
236+
def _get_directory_from_hash(self, key_parts: object) -> Path:
234237
key = hashlib.sha256(
235238
json.dumps(
236239
key_parts, sort_keys=True, separators=(",", ":"), ensure_ascii=True
237240
).encode("ascii")
238241
).hexdigest()
239242

240243
split_key = [key[:2], key[2:4], key[4:6], key[6:]]
241-
242244
return self._cache_dir.joinpath(*split_key)
243245

246+
def get_cache_directory_for_git(
247+
self, url: str, ref: str, subdirectory: str | None
248+
) -> Path:
249+
key_parts = {"url": url, "ref": ref}
250+
if subdirectory:
251+
key_parts["subdirectory"] = subdirectory
252+
253+
return self._get_directory_from_hash(key_parts)
254+
244255
def get_cached_archive_for_link(
245256
self,
246257
link: Link,
247258
*,
248259
strict: bool,
249260
env: Env | None = None,
261+
) -> Path | None:
262+
cache_dir = self.get_cache_directory_for_link(link)
263+
264+
return self._get_cached_archive(
265+
cache_dir, strict=strict, filename=link.filename, env=env
266+
)
267+
268+
def get_cached_archive_for_git(
269+
self, url: str, reference: str, subdirectory: str | None, env: Env
270+
) -> Path | None:
271+
cache_dir = self.get_cache_directory_for_git(url, reference, subdirectory)
272+
273+
return self._get_cached_archive(cache_dir, strict=False, env=env)
274+
275+
def _get_cached_archive(
276+
self,
277+
cache_dir: Path,
278+
*,
279+
strict: bool,
280+
filename: str | None = None,
281+
env: Env | None = None,
250282
) -> Path | None:
251283
assert strict or env is not None
284+
# implication "strict -> filename should not be None"
285+
assert not strict or filename is not None
252286

253-
archives = self._get_cached_archives_for_link(link)
287+
archives = self._get_cached_archives(cache_dir)
254288
if not archives:
255289
return None
256290

257291
candidates: list[tuple[float | None, Path]] = []
292+
258293
for archive in archives:
259294
if strict:
260295
# in strict mode return the original cached archive instead of the
261296
# prioritized archive type.
262-
if link.filename == archive.name:
297+
if filename == archive.name:
263298
return archive
264299
continue
265300

@@ -286,9 +321,7 @@ def get_cached_archive_for_link(
286321

287322
return min(candidates)[1]
288323

289-
def _get_cached_archives_for_link(self, link: Link) -> list[Path]:
290-
cache_dir = self.get_cache_directory_for_link(link)
291-
324+
def _get_cached_archives(self, cache_dir: Path) -> list[Path]:
292325
archive_types = ["whl", "tar.gz", "tar.bz2", "bz2", "zip"]
293326
paths: list[Path] = []
294327
for archive_type in archive_types:

0 commit comments

Comments
 (0)