From 4a77549711cd2a3d6b8db36f0cb59adfee3782c1 Mon Sep 17 00:00:00 2001 From: llyycchhee Date: Tue, 21 Oct 2025 04:47:42 +0000 Subject: [PATCH] feat(reranker): sup jina-reranker-v3 --- doc/source/models/model_abilities/embed.rst | 8 +++++- xinference/model/rerank/model_spec.json | 27 +++++++++++++++++++ .../rerank/sentence_transformers/core.py | 12 +++++++-- 3 files changed, 44 insertions(+), 3 deletions(-) diff --git a/doc/source/models/model_abilities/embed.rst b/doc/source/models/model_abilities/embed.rst index 509d421bcd..29515fc19c 100644 --- a/doc/source/models/model_abilities/embed.rst +++ b/doc/source/models/model_abilities/embed.rst @@ -123,4 +123,10 @@ Does Embeddings API provides integration method for LangChain? ----------------------------------------------------------------------------------- Yes, you can refer to the related sections in LangChain's respective official Xinference documentation. -Here is the link: `Text Embedding Models: Xinference `_ \ No newline at end of file +Here is the link: `Text Embedding Models: Xinference `_ + + +Does Embeddings API support hrbrid model? +----------------------------------------------------------------------------------- + +Yes, you can use ``flag`` as the engine to deploy the model and call Embeddings API by setting the extra parameter ``return_parse=True`` which will return sparse vectors. \ No newline at end of file diff --git a/xinference/model/rerank/model_spec.json b/xinference/model/rerank/model_spec.json index 5416df0dec..befb3459b8 100644 --- a/xinference/model/rerank/model_spec.json +++ b/xinference/model/rerank/model_spec.json @@ -154,6 +154,33 @@ "model_id": "jinaai/jina-reranker-v2-base-multilingual", "model_revision": "298e48cada4a9318650d7fbd795f63827f884087", "quantizations": ["none"] + }, + "modelscope": { + "model_id": "jinaai/jina-reranker-v2-base-multilingual", + "quantizations": ["none"] + } + } + } + ] + }, + { + "version": 2, + "model_name": "jina-reranker-v3", + "type": "normal", + "language": ["en", "zh", "multilingual"], + "max_tokens": 131072, + "model_specs": [ + { + "model_format": "pytorch", + "model_src": { + "huggingface": { + "model_id": "jinaai/jina-reranker-v3", + "model_revision": "7fa51ea4da62cb1b13ac263a1a41e20962a36c81", + "quantizations": ["none"] + }, + "modelscope": { + "model_id": "jinaai/jina-reranker-v3", + "quantizations": ["none"] } } } diff --git a/xinference/model/rerank/sentence_transformers/core.py b/xinference/model/rerank/sentence_transformers/core.py index ee57b06602..fabbb6e593 100644 --- a/xinference/model/rerank/sentence_transformers/core.py +++ b/xinference/model/rerank/sentence_transformers/core.py @@ -81,6 +81,7 @@ def load(self): if ( self.model_family.type == "normal" and "qwen3" not in self.model_family.model_name.lower() + and "jina-reranker-v3" not in self.model_family.model_name.lower() ): try: import sentence_transformers @@ -109,7 +110,10 @@ def load(self): ) if self._use_fp16: self._model.model.half() - elif "qwen3" in self.model_family.model_name.lower(): + elif ( + "qwen3" in self.model_family.model_name.lower() + or "jina-reranker-v3" in self.model_family.model_name.lower() + ): # qwen3-reranker # now we use transformers # TODO: support engines for rerank models @@ -225,6 +229,7 @@ def rerank( if ( self.model_family.type == "normal" and "qwen3" not in self.model_family.model_name.lower() + and "jina-reranker-v3" not in self.model_family.model_name.lower() ): logger.debug("Passing processed sentences: %s", sentence_combinations) similarity_scores = self._model.predict( @@ -235,7 +240,10 @@ def rerank( ).cpu() if similarity_scores.dtype == torch.bfloat16: similarity_scores = similarity_scores.float() - elif "qwen3" in self.model_family.model_name.lower(): + elif ( + "qwen3" in self.model_family.model_name.lower() + or "jina-reranker-v3" in self.model_family.model_name.lower() + ): def format_instruction(instruction, query, doc): if instruction is None: