diff --git a/doc/source/models/model_abilities/embed.rst b/doc/source/models/model_abilities/embed.rst index 509d421bcd..29515fc19c 100644 --- a/doc/source/models/model_abilities/embed.rst +++ b/doc/source/models/model_abilities/embed.rst @@ -123,4 +123,10 @@ Does Embeddings API provides integration method for LangChain? ----------------------------------------------------------------------------------- Yes, you can refer to the related sections in LangChain's respective official Xinference documentation. -Here is the link: `Text Embedding Models: Xinference `_ \ No newline at end of file +Here is the link: `Text Embedding Models: Xinference `_ + + +Does Embeddings API support hrbrid model? +----------------------------------------------------------------------------------- + +Yes, you can use ``flag`` as the engine to deploy the model and call Embeddings API by setting the extra parameter ``return_parse=True`` which will return sparse vectors. \ No newline at end of file diff --git a/xinference/model/rerank/sentence_transformers/core.py b/xinference/model/rerank/sentence_transformers/core.py index ee57b06602..fabbb6e593 100644 --- a/xinference/model/rerank/sentence_transformers/core.py +++ b/xinference/model/rerank/sentence_transformers/core.py @@ -81,6 +81,7 @@ def load(self): if ( self.model_family.type == "normal" and "qwen3" not in self.model_family.model_name.lower() + and "jina-reranker-v3" not in self.model_family.model_name.lower() ): try: import sentence_transformers @@ -109,7 +110,10 @@ def load(self): ) if self._use_fp16: self._model.model.half() - elif "qwen3" in self.model_family.model_name.lower(): + elif ( + "qwen3" in self.model_family.model_name.lower() + or "jina-reranker-v3" in self.model_family.model_name.lower() + ): # qwen3-reranker # now we use transformers # TODO: support engines for rerank models @@ -225,6 +229,7 @@ def rerank( if ( self.model_family.type == "normal" and "qwen3" not in self.model_family.model_name.lower() + and "jina-reranker-v3" not in self.model_family.model_name.lower() ): logger.debug("Passing processed sentences: %s", sentence_combinations) similarity_scores = self._model.predict( @@ -235,7 +240,10 @@ def rerank( ).cpu() if similarity_scores.dtype == torch.bfloat16: similarity_scores = similarity_scores.float() - elif "qwen3" in self.model_family.model_name.lower(): + elif ( + "qwen3" in self.model_family.model_name.lower() + or "jina-reranker-v3" in self.model_family.model_name.lower() + ): def format_instruction(instruction, query, doc): if instruction is None: