diff --git a/invokeai/backend/model_manager/model_on_disk.py b/invokeai/backend/model_manager/model_on_disk.py index a77853c8f3d..284c4998589 100644 --- a/invokeai/backend/model_manager/model_on_disk.py +++ b/invokeai/backend/model_manager/model_on_disk.py @@ -84,6 +84,9 @@ def load_state_dict(self, path: Optional[Path] = None) -> StateDict: path = self.resolve_weight_file(path) + if path in self._state_dict_cache: + return self._state_dict_cache[path] + with SilenceWarnings(): if path.suffix.endswith((".ckpt", ".pt", ".pth", ".bin")): scan_result = scan_file_path(path) diff --git a/invokeai/backend/quantization/gguf/loaders.py b/invokeai/backend/quantization/gguf/loaders.py index 178c0508466..de210d3da8d 100644 --- a/invokeai/backend/quantization/gguf/loaders.py +++ b/invokeai/backend/quantization/gguf/loaders.py @@ -1,3 +1,4 @@ +import gc from pathlib import Path import gguf @@ -5,18 +6,48 @@ from invokeai.backend.quantization.gguf.ggml_tensor import GGMLTensor from invokeai.backend.quantization.gguf.utils import TORCH_COMPATIBLE_QTYPES +from invokeai.backend.util.logging import InvokeAILogger + +logger = InvokeAILogger.get_logger() + + +class WrappedGGUFReader: + """Wrapper around GGUFReader that adds a close() method.""" + + def __init__(self, path: Path): + self.reader = gguf.GGUFReader(path) + + def __enter__(self): + return self.reader + + def __exit__(self, exc_type, exc_val, exc_tb): + self.close() + return False + + def close(self): + """Explicitly close the memory-mapped file.""" + if hasattr(self.reader, "data"): + try: + self.reader.data.flush() + del self.reader.data + except (AttributeError, OSError, ValueError) as e: + logger.warning(f"Wasn't able to close GGUF memory map: {e}") + del self.reader + gc.collect() def gguf_sd_loader(path: Path, compute_dtype: torch.dtype) -> dict[str, GGMLTensor]: - reader = gguf.GGUFReader(path) - - sd: dict[str, GGMLTensor] = {} - for tensor in reader.tensors: - torch_tensor = torch.from_numpy(tensor.data) - shape = torch.Size(tuple(int(v) for v in reversed(tensor.shape))) - if tensor.tensor_type in TORCH_COMPATIBLE_QTYPES: - torch_tensor = torch_tensor.view(*shape) - sd[tensor.name] = GGMLTensor( - torch_tensor, ggml_quantization_type=tensor.tensor_type, tensor_shape=shape, compute_dtype=compute_dtype - ) - return sd + with WrappedGGUFReader(path) as reader: + sd: dict[str, GGMLTensor] = {} + for tensor in reader.tensors: + torch_tensor = torch.from_numpy(tensor.data) + shape = torch.Size(tuple(int(v) for v in reversed(tensor.shape))) + if tensor.tensor_type in TORCH_COMPATIBLE_QTYPES: + torch_tensor = torch_tensor.view(*shape) + sd[tensor.name] = GGMLTensor( + torch_tensor, + ggml_quantization_type=tensor.tensor_type, + tensor_shape=shape, + compute_dtype=compute_dtype, + ) + return sd