Skip to content
Merged
Show file tree
Hide file tree
Changes from 3 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
7 changes: 3 additions & 4 deletions xinference/core/worker.py
Original file line number Diff line number Diff line change
Expand Up @@ -817,10 +817,7 @@ def _create_virtual_env_manager(
# we specify python_path explicitly
# sometimes uv would find other versions.
python_path = pathlib.Path(sys.executable)
kw = {}
if XINFERENCE_VIRTUAL_ENV_SKIP_INSTALLED:
kw["skip_installed"] = XINFERENCE_VIRTUAL_ENV_SKIP_INSTALLED
virtual_env_manager.create_env(python_path=python_path, **kw)
virtual_env_manager.create_env(python_path=python_path)
return virtual_env_manager

@classmethod
Expand All @@ -847,6 +844,8 @@ def _prepare_virtual_env(
packages.extend(virtual_env_packages)
conf.pop("packages", None)
conf.pop("inherit_pip_config", None)
if XINFERENCE_VIRTUAL_ENV_SKIP_INSTALLED:
conf["skip_installed"] = XINFERENCE_VIRTUAL_ENV_SKIP_INSTALLED

logger.info(
"Installing packages %s in virtual env %s, with settings(%s)",
Expand Down
30 changes: 30 additions & 0 deletions xinference/model/image/model_spec.json
Original file line number Diff line number Diff line change
Expand Up @@ -175,6 +175,36 @@
"no_build_isolation": true
}
},
{
"version": 2,
"model_name": "Qwen-Image",
"model_family": "stable_diffusion",
"model_ability": [
"text2image"
],
"model_src": {
"huggingface": {
"model_id": "Qwen/Qwen-Image",
"model_revision": "4516c4d3058302ff35cd86c62ffa645d039fefad"
},
"modelscope": {
"model_id": "Qwen/Qwen-Image",
"model_revision": "master"
}
},
"default_model_config": {
"quantize": true,
"quantize_text_encoder": "text_encoder",
"torch_dtype": "bfloat16"
},
"virtualenv": {
"packages": [
"git+https://github.com/huggingface/diffusers.git",
"#system_numpy#"
],
"no_build_isolation": true
}
},
{
"version": 2,
"model_name": "sd3-medium",
Expand Down
22 changes: 22 additions & 0 deletions xinference/model/image/stable_diffusion/core.py
Original file line number Diff line number Diff line change
Expand Up @@ -254,6 +254,14 @@ def load(self):
self._model = FluxKontextPipeline.from_pretrained(
self._model_path, **self._kwargs
)
elif "qwen" in self._model_spec.model_name.lower():
# TODO: remove this branch when auto pipeline supports
# Qwen-Image
from diffusers import DiffusionPipeline

self._model = DiffusionPipeline.from_pretrained(
self._model_path, **self._kwargs
)
else:
raise
self._load_to_device(self._model)
Expand Down Expand Up @@ -348,11 +356,19 @@ def _quantize_text_encoder(self, quantize_text_encoder: Optional[str]):
return

if not quantize_text_encoder:
logger.debug("No text encoder quantization")
return

quantization_method = self._kwargs.pop("text_encoder_quantize_method", "bnb")
quantization = self._kwargs.pop("text_encoder_quantization", "8-bit")

logger.debug(
"Quantize text encoder %s with method %s, quantization %s",
quantize_text_encoder,
quantization_method,
quantization,
)

torch_dtype = self._torch_dtype
for text_encoder_name in quantize_text_encoder.split(","):
quantization_kwargs: Dict[str, Any] = {}
Expand Down Expand Up @@ -389,8 +405,13 @@ def _quantize_transformer(self):

if not quantization:
# skip if no quantization specified
logger.debug("No transformer quantization")
return

logger.debug(
"Quantize transformer with %s, quantization %s", method, quantization
)

torch_dtype = self._torch_dtype
transformer_cls = self._get_layer_cls("transformer")
quantization_config = self._get_quantize_config(
Expand All @@ -409,6 +430,7 @@ def _quantize_transformer_gguf(self):

# GGUF transformer
torch_dtype = self._torch_dtype
logger.debug("Quantize transformer with gguf file %s", self._gguf_model_path)
self._kwargs["transformer"] = self._get_layer_cls(
"transformer"
).from_single_file(
Expand Down
Loading