Skip to content
Draft
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
32 changes: 26 additions & 6 deletions lalamo/model_import/common.py
Original file line number Diff line number Diff line change
Expand Up @@ -102,18 +102,38 @@ def download_file(
) -> Path:
if progress_callback is not None:
progress_callback(DownloadingFileEvent(file_spec))
result = huggingface_hub.hf_hub_download(
repo_id=file_spec.repo or model_repo,
local_dir=output_dir,
filename=file_spec.filename,
)
try:
result = huggingface_hub.hf_hub_download(
repo_id=file_spec.repo or model_repo,
local_dir=output_dir,
filename=file_spec.filename,
)
except (huggingface_hub.errors.OfflineModeIsEnabled, OSError):
result = huggingface_hub.try_to_load_from_cache(
repo_id=file_spec.repo or model_repo,
filename=file_spec.filename,
)
if result is None or isinstance(result, str) and not Path(result).exists():
Copy link
Copy Markdown

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

P1 Badge Handle _CACHED_NO_EXIST before converting cache result to Path

try_to_load_from_cache can return the _CACHED_NO_EXIST sentinel (not just None or a path string), but the current guard only rejects None and missing string paths. In that case the code falls through to Path(result), which raises TypeError and breaks offline imports in caches that recorded a missing file (for example after a previous lookup). Treat the sentinel/non-string return as a cache miss and re-raise the original download error instead.

Useful? React with 👍 / 👎.

raise
if progress_callback is not None:
progress_callback(FinishedDownloadingFileEvent(file_spec))
return Path(result)


def _list_cached_repo_files(model_repo: str) -> list[str]:
"""List files available in the local HuggingFace cache for a given repo."""
cache_info = huggingface_hub.scan_cache_dir()
for repo_info in cache_info.repos:
if repo_info.repo_id == model_repo:
return [f.file_name for rev in repo_info.revisions for f in rev.files]
Copy link
Copy Markdown

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

P1 Badge Limit offline weight discovery to the active cached revision

The offline fallback unions files from all cached revisions (for rev in repo_info.revisions for f in rev.files), so list_weight_files can include stale shard names that no longer exist in the current main revision. download_weights will then attempt to fetch those stale files and fail offline even when the latest cached snapshot is otherwise usable. The fallback should select files from a single target revision (e.g., the cached commit pointed to by main) rather than combining every revision.

Useful? React with 👍 / 👎.

return []


def list_weight_files(model_repo: str, weights_type: WeightsType) -> list[FileSpec]:
all_files = huggingface_hub.list_repo_files(model_repo)
try:
all_files = huggingface_hub.list_repo_files(model_repo)
except (huggingface_hub.errors.OfflineModeIsEnabled, OSError):
all_files = _list_cached_repo_files(model_repo)
match weights_type:
case WeightsType.SAFETENSORS:
return [FileSpec(filename) for filename in all_files if filename.endswith(".safetensors")]
Expand Down
Loading