File tree Expand file tree Collapse file tree
Expand file tree Collapse file tree Original file line number Diff line number Diff line change @@ -345,6 +345,7 @@ def _wait_for_stages_ready(self, timeout: int = 120) -> None:
345345 )
346346
347347 suggestions = [
348+ f"Ignore this warning if the model weight download / load from disk time is longer than { timeout } s." ,
348349 "Verify GPU/device assignment in config (runtime.devices) is correct." ,
349350 "Check GPU/host memory availability; reduce model or batch size if needed." ,
350351 "Check model weights path and network reachability (if loading remotely)." ,
@@ -353,7 +354,7 @@ def _wait_for_stages_ready(self, timeout: int = 120) -> None:
353354
354355 formatted_suggestions = "\n " .join (f" { i + 1 } ) { msg } " for i , msg in enumerate (suggestions ))
355356
356- logger .error (f"[{ self ._name } ] Stage initialization failed . Troubleshooting Steps:\n { formatted_suggestions } " )
357+ logger .warning (f"[{ self ._name } ] Stage initialization timeout . Troubleshooting Steps:\n { formatted_suggestions } " )
357358
358359 def start_profile (self , stages : list [int ] | None = None ) -> None :
359360 """Start profiling for specified stages.
You can’t perform that action at this time.
0 commit comments