diff --git a/docs/examples/Supported_Models.ipynb b/docs/examples/Supported_Models.ipynb
index c180b9ea..8c100d12 100644
--- a/docs/examples/Supported_Models.ipynb
+++ b/docs/examples/Supported_Models.ipynb
@@ -12,7 +12,7 @@
},
{
"cell_type": "code",
- "execution_count": 4,
+ "execution_count": 2,
"metadata": {},
"outputs": [
{
@@ -110,14 +110,22 @@
" \n",
"
\n",
" | 8 | \n",
+ " sentence-transformers/paraphrase-multilingual-MiniLM-L12-v2 | \n",
+ " 384 | \n",
+ " Sentence Transformer model, paraphrase-multilingual-MiniLM-L12-v2 | \n",
+ " 0.46 | \n",
+ " {'hf': 'qdrant/paraphrase-multilingual-MiniLM-L12-v2-onnx-Q'} | \n",
+ "
\n",
+ " \n",
+ " | 9 | \n",
" nomic-ai/nomic-embed-text-v1 | \n",
" 768 | \n",
" 8192 context length english model | \n",
" 0.54 | \n",
- " {'hf': 'xenova/nomic-embed-text-v1'} | \n",
+ " {'hf': 'nomic-ai/nomic-embed-text-v1'} | \n",
"
\n",
" \n",
- " | 9 | \n",
+ " 10 | \n",
" intfloat/multilingual-e5-large | \n",
" 1024 | \n",
" Multilingual model, e5-large. Recommend using this model for non-English languages | \n",
@@ -125,7 +133,7 @@
" {'url': 'https://storage.googleapis.com/qdrant-fastembed/fast-multilingual-e5-large.tar.gz', 'hf': 'qdrant/multilingual-e5-large-onnx'} | \n",
"
\n",
" \n",
- " | 10 | \n",
+ " 11 | \n",
" sentence-transformers/paraphrase-multilingual-mpnet-base-v2 | \n",
" 768 | \n",
" Sentence-transformers model for tasks like clustering or semantic search | \n",
@@ -133,7 +141,7 @@
" {'hf': 'xenova/paraphrase-multilingual-mpnet-base-v2'} | \n",
"
\n",
" \n",
- " | 11 | \n",
+ " 12 | \n",
" jinaai/jina-embeddings-v2-base-en | \n",
" 768 | \n",
" English embedding model supporting 8192 sequence length | \n",
@@ -141,7 +149,7 @@
" {'hf': 'xenova/jina-embeddings-v2-base-en'} | \n",
"
\n",
" \n",
- " | 12 | \n",
+ " 13 | \n",
" jinaai/jina-embeddings-v2-small-en | \n",
" 512 | \n",
" English embedding model supporting 8192 sequence length | \n",
@@ -162,11 +170,12 @@
"5 BAAI/bge-small-en-v1.5 384 \n",
"6 BAAI/bge-small-zh-v1.5 512 \n",
"7 sentence-transformers/all-MiniLM-L6-v2 384 \n",
- "8 nomic-ai/nomic-embed-text-v1 768 \n",
- "9 intfloat/multilingual-e5-large 1024 \n",
- "10 sentence-transformers/paraphrase-multilingual-mpnet-base-v2 768 \n",
- "11 jinaai/jina-embeddings-v2-base-en 768 \n",
- "12 jinaai/jina-embeddings-v2-small-en 512 \n",
+ "8 sentence-transformers/paraphrase-multilingual-MiniLM-L12-v2 384 \n",
+ "9 nomic-ai/nomic-embed-text-v1 768 \n",
+ "10 intfloat/multilingual-e5-large 1024 \n",
+ "11 sentence-transformers/paraphrase-multilingual-mpnet-base-v2 768 \n",
+ "12 jinaai/jina-embeddings-v2-base-en 768 \n",
+ "13 jinaai/jina-embeddings-v2-small-en 512 \n",
"\n",
" description \\\n",
"0 Base English model \n",
@@ -177,11 +186,12 @@
"5 Fast and Default English model \n",
"6 Fast and recommended Chinese model \n",
"7 Sentence Transformer model, MiniLM-L6-v2 \n",
- "8 8192 context length english model \n",
- "9 Multilingual model, e5-large. Recommend using this model for non-English languages \n",
- "10 Sentence-transformers model for tasks like clustering or semantic search \n",
- "11 English embedding model supporting 8192 sequence length \n",
+ "8 Sentence Transformer model, paraphrase-multilingual-MiniLM-L12-v2 \n",
+ "9 8192 context length english model \n",
+ "10 Multilingual model, e5-large. Recommend using this model for non-English languages \n",
+ "11 Sentence-transformers model for tasks like clustering or semantic search \n",
"12 English embedding model supporting 8192 sequence length \n",
+ "13 English embedding model supporting 8192 sequence length \n",
"\n",
" size_in_GB \\\n",
"0 0.50 \n",
@@ -192,11 +202,12 @@
"5 0.13 \n",
"6 0.10 \n",
"7 0.09 \n",
- "8 0.54 \n",
- "9 2.24 \n",
- "10 1.11 \n",
- "11 0.55 \n",
- "12 0.13 \n",
+ "8 0.46 \n",
+ "9 0.54 \n",
+ "10 2.24 \n",
+ "11 1.11 \n",
+ "12 0.55 \n",
+ "13 0.13 \n",
"\n",
" sources \n",
"0 {'url': 'https://storage.googleapis.com/qdrant-fastembed/fast-bge-base-en.tar.gz'} \n",
@@ -207,14 +218,15 @@
"5 {'url': 'https://storage.googleapis.com/qdrant-fastembed/fast-bge-small-en-v1.5.tar.gz', 'hf': 'qdrant/bge-small-en-v1.5-onnx-q'} \n",
"6 {'url': 'https://storage.googleapis.com/qdrant-fastembed/fast-bge-small-zh-v1.5.tar.gz'} \n",
"7 {'url': 'https://storage.googleapis.com/qdrant-fastembed/sentence-transformers-all-MiniLM-L6-v2.tar.gz', 'hf': 'qdrant/all-MiniLM-L6-v2-onnx'} \n",
- "8 {'hf': 'xenova/nomic-embed-text-v1'} \n",
- "9 {'url': 'https://storage.googleapis.com/qdrant-fastembed/fast-multilingual-e5-large.tar.gz', 'hf': 'qdrant/multilingual-e5-large-onnx'} \n",
- "10 {'hf': 'xenova/paraphrase-multilingual-mpnet-base-v2'} \n",
- "11 {'hf': 'xenova/jina-embeddings-v2-base-en'} \n",
- "12 {'hf': 'xenova/jina-embeddings-v2-small-en'} "
+ "8 {'hf': 'qdrant/paraphrase-multilingual-MiniLM-L12-v2-onnx-Q'} \n",
+ "9 {'hf': 'nomic-ai/nomic-embed-text-v1'} \n",
+ "10 {'url': 'https://storage.googleapis.com/qdrant-fastembed/fast-multilingual-e5-large.tar.gz', 'hf': 'qdrant/multilingual-e5-large-onnx'} \n",
+ "11 {'hf': 'xenova/paraphrase-multilingual-mpnet-base-v2'} \n",
+ "12 {'hf': 'xenova/jina-embeddings-v2-base-en'} \n",
+ "13 {'hf': 'xenova/jina-embeddings-v2-small-en'} "
]
},
- "execution_count": 4,
+ "execution_count": 2,
"metadata": {},
"output_type": "execute_result"
}
@@ -244,7 +256,7 @@
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
- "version": "3.11.7"
+ "version": "3.11.4"
},
"orig_nbformat": 4
},
diff --git a/fastembed/models.json b/fastembed/models.json
index e370c74a..f10bd3af 100644
--- a/fastembed/models.json
+++ b/fastembed/models.json
@@ -110,6 +110,16 @@
"https://storage.googleapis.com/qdrant-fastembed/sentence-transformers-all-MiniLM-L6-v2.tar.gz"
]
},
+ {
+ "model": "sentence-transformers/paraphrase-multilingual-MiniLM-L12-v2",
+ "dim": 384,
+ "description": "Sentence Transformer model, paraphrase-multilingual-MiniLM-L12-v2",
+ "size_in_GB": 0.46,
+ "hf_sources": [
+ "qdrant/paraphrase-multilingual-MiniLM-L12-v2-onnx-Q"
+ ],
+ "compressed_url_sources": []
+ },
{
"model": "xenova/multilingual-e5-large",
"dim": 1024,
diff --git a/fastembed/text/onnx_embedding.py b/fastembed/text/onnx_embedding.py
index 165f6eb4..1d194450 100644
--- a/fastembed/text/onnx_embedding.py
+++ b/fastembed/text/onnx_embedding.py
@@ -98,6 +98,15 @@
"hf": "qdrant/all-MiniLM-L6-v2-onnx",
},
},
+ {
+ "model": "sentence-transformers/paraphrase-multilingual-MiniLM-L12-v2",
+ "dim": 384,
+ "description": "Sentence Transformer model, paraphrase-multilingual-MiniLM-L12-v2",
+ "size_in_GB": 0.46,
+ "sources": {
+ "hf": "qdrant/paraphrase-multilingual-MiniLM-L12-v2-onnx-Q",
+ },
+ },
{
"model": "nomic-ai/nomic-embed-text-v1",
"dim": 768,
diff --git a/tests/test_onnx_embeddings.py b/tests/test_onnx_embeddings.py
index 7f580e6b..ea255af2 100644
--- a/tests/test_onnx_embeddings.py
+++ b/tests/test_onnx_embeddings.py
@@ -13,6 +13,7 @@
"BAAI/bge-base-en-v1.5": np.array([0.01129394, 0.05493144, 0.02615099, 0.00328772, 0.02996045]),
"BAAI/bge-large-en-v1.5": np.array([0.03434538, 0.03316108, 0.02191251, -0.03713358, -0.01577825]),
"sentence-transformers/all-MiniLM-L6-v2": np.array([0.0259, 0.0058, 0.0114, 0.0380, -0.0233]),
+ "sentence-transformers/paraphrase-multilingual-MiniLM-L12-v2": np.array([0.0094, 0.0184, 0.0328, 0.0072, -0.0351]),
"intfloat/multilingual-e5-large": np.array([0.0098, 0.0045, 0.0066, -0.0354, 0.0070]),
"xenova/multilingual-e5-large": np.array([0.00975464, 0.00446568, 0.00655449, -0.0354155, 0.00702112]),
"xenova/paraphrase-multilingual-mpnet-base-v2": np.array(
diff --git a/tests/test_text_onnx_embeddings.py b/tests/test_text_onnx_embeddings.py
index 8f523adf..81bcbbd3 100644
--- a/tests/test_text_onnx_embeddings.py
+++ b/tests/test_text_onnx_embeddings.py
@@ -14,6 +14,7 @@
"BAAI/bge-large-en-v1.5": np.array([0.03434538, 0.03316108, 0.02191251, -0.03713358, -0.01577825]),
"BAAI/bge-large-en-v1.5-quantized": np.array([0.03434538, 0.03316108, 0.02191251, -0.03713358, -0.01577825]),
"sentence-transformers/all-MiniLM-L6-v2": np.array([0.0259, 0.0058, 0.0114, 0.0380, -0.0233]),
+ "sentence-transformers/paraphrase-multilingual-MiniLM-L12-v2": np.array([0.0094, 0.0184, 0.0328, 0.0072, -0.0351]),
"intfloat/multilingual-e5-large": np.array([0.0098, 0.0045, 0.0066, -0.0354, 0.0070]),
"sentence-transformers/paraphrase-multilingual-mpnet-base-v2": np.array(
[-0.01341097, 0.0416553, -0.00480805, 0.02844842, 0.0505299]