88
99import pytest
1010from haystack import Document , Pipeline
11- from haystack .components .embedders import SentenceTransformersTextEmbedder
11+ from haystack .components .embedders import OpenAITextEmbedder
1212from haystack .components .joiners .document_joiner import JoinMode
1313from haystack .core .component import component
1414from haystack .document_stores .types import FilterPolicy
@@ -25,93 +25,87 @@ def run(self, text: str, param_a: str = "default", param_b: str = "another_defau
2525
2626
2727class TestElasticsearchHybridRetriever :
28- serialised = { # noqa: RUF012
29- "type" : "haystack_integrations.components.retrievers.elasticsearch.elasticsearch_hybrid_retriever.ElasticsearchHybridRetriever" , # noqa: E501
30- "init_parameters" : {
31- "document_store" : {
32- "type" : "haystack_integrations.document_stores.elasticsearch.document_store.ElasticsearchDocumentStore" ,
33- "init_parameters" : {
34- "hosts" : None ,
35- "custom_mapping" : None ,
36- "index" : "default" ,
37- "api_key" : {"type" : "env_var" , "env_vars" : ["ELASTIC_API_KEY" ], "strict" : False },
38- "api_key_id" : {"type" : "env_var" , "env_vars" : ["ELASTIC_API_KEY_ID" ], "strict" : False },
39- "embedding_similarity_function" : "cosine" ,
40- "sparse_vector_field" : None ,
41- "ingest_pipeline" : None ,
28+ @pytest .fixture (autouse = True )
29+ def openai_api_key (self , monkeypatch ):
30+ # the serde tests build a real OpenAITextEmbedder; haystack-ai 2.x resolves the key at init
31+ monkeypatch .setenv ("OPENAI_API_KEY" , "fake-key" )
32+
33+ @pytest .fixture
34+ def serialised (self ):
35+ return {
36+ "type" : "haystack_integrations.components.retrievers.elasticsearch.elasticsearch_hybrid_retriever.ElasticsearchHybridRetriever" , # noqa: E501
37+ "init_parameters" : {
38+ "document_store" : {
39+ "type" : (
40+ "haystack_integrations.document_stores.elasticsearch.document_store.ElasticsearchDocumentStore"
41+ ),
42+ "init_parameters" : {
43+ "hosts" : None ,
44+ "custom_mapping" : None ,
45+ "index" : "default" ,
46+ "api_key" : {"type" : "env_var" , "env_vars" : ["ELASTIC_API_KEY" ], "strict" : False },
47+ "api_key_id" : {"type" : "env_var" , "env_vars" : ["ELASTIC_API_KEY_ID" ], "strict" : False },
48+ "embedding_similarity_function" : "cosine" ,
49+ "sparse_vector_field" : None ,
50+ "ingest_pipeline" : None ,
51+ },
4252 },
43- },
44- "embedder" : {
45- "type" : "haystack.components.embedders.sentence_transformers_text_embedder.SentenceTransformersTextEmbedder" , # noqa: E501
46- "init_parameters" : {
47- "model" : "sentence-transformers/all-mpnet-base-v2" ,
48- "token" : {"type" : "env_var" , "env_vars" : ["HF_API_TOKEN" , "HF_TOKEN" ], "strict" : False },
49- "prefix" : "" ,
50- "suffix" : "" ,
51- "local_files_only" : False ,
52- "batch_size" : 32 ,
53- "progress_bar" : True ,
54- "normalize_embeddings" : False ,
55- "trust_remote_code" : False ,
56- "truncate_dim" : None ,
57- "model_kwargs" : None ,
58- "tokenizer_kwargs" : None ,
59- "config_kwargs" : None ,
60- "precision" : "float32" ,
61- "encode_kwargs" : None ,
62- "backend" : "torch" ,
53+ "embedder" : {
54+ "type" : "haystack.components.embedders.openai_text_embedder.OpenAITextEmbedder" ,
55+ "init_parameters" : {
56+ "api_key" : {"type" : "env_var" , "env_vars" : ["OPENAI_API_KEY" ], "strict" : True },
57+ "model" : "text-embedding-ada-002" ,
58+ "dimensions" : None ,
59+ "api_base_url" : None ,
60+ "organization" : None ,
61+ "prefix" : "" ,
62+ "suffix" : "" ,
63+ "timeout" : None ,
64+ "max_retries" : None ,
65+ "http_client_kwargs" : None ,
66+ },
6367 },
68+ "filters_bm25" : None ,
69+ "fuzziness" : "AUTO" ,
70+ "top_k_bm25" : 10 ,
71+ "scale_score" : False ,
72+ "filter_policy_bm25" : "replace" ,
73+ "filters_embedding" : None ,
74+ "top_k_embedding" : 10 ,
75+ "num_candidates" : None ,
76+ "filter_policy_embedding" : "replace" ,
77+ "join_mode" : "reciprocal_rank_fusion" ,
78+ "weights" : None ,
79+ "top_k" : None ,
80+ "sort_by_score" : True ,
6481 },
65- "filters_bm25" : None ,
66- "fuzziness" : "AUTO" ,
67- "top_k_bm25" : 10 ,
68- "scale_score" : False ,
69- "filter_policy_bm25" : "replace" ,
70- "filters_embedding" : None ,
71- "top_k_embedding" : 10 ,
72- "num_candidates" : None ,
73- "filter_policy_embedding" : "replace" ,
74- "join_mode" : "reciprocal_rank_fusion" ,
75- "weights" : None ,
76- "top_k" : None ,
77- "sort_by_score" : True ,
78- },
79- }
82+ }
8083
8184 @pytest .fixture
8285 def mock_embedder (self ):
8386 return MockedTextEmbedder ()
8487
8588 @patch ("haystack_integrations.document_stores.elasticsearch.document_store.Elasticsearch" )
86- def test_to_dict (self , _mock_elasticsearch_client ) -> None :
89+ def test_to_dict (self , _mock_elasticsearch_client , serialised ) -> None :
8790 doc_store = ElasticsearchDocumentStore ()
88- embedder = SentenceTransformersTextEmbedder () # we use actual embedder here for the de/serialization
91+ embedder = OpenAITextEmbedder () # we use actual embedder here for the de/serialization
8992 hybrid_retriever = ElasticsearchHybridRetriever (document_store = doc_store , embedder = embedder )
9093 result = hybrid_retriever .to_dict ()
9194
92- result ["init_parameters" ]["embedder" ]["init_parameters" ].pop ("device" , None )
93-
94- expected = deepcopy (self .serialised )
95- # revision was added in Haystack 2.20.0; include it in expected if present in result
96- if "revision" in result ["init_parameters" ]["embedder" ]["init_parameters" ]:
97- expected ["init_parameters" ]["embedder" ]["init_parameters" ]["revision" ] = None
98-
99- assert result == expected
95+ assert result == serialised
10096
10197 @patch ("haystack_integrations.document_stores.elasticsearch.document_store.Elasticsearch" )
102- def test_from_dict (self , _mock_elasticsearch_client ):
103- data = deepcopy (self . serialised )
98+ def test_from_dict (self , _mock_elasticsearch_client , serialised ):
99+ data = deepcopy (serialised )
104100 deserialized = ElasticsearchHybridRetriever .from_dict (data )
105101 assert isinstance (deserialized , ElasticsearchHybridRetriever )
106102 result = deserialized .to_dict ()
107- result ["init_parameters" ]["embedder" ]["init_parameters" ].pop ("device" , None )
108- result ["init_parameters" ]["embedder" ]["init_parameters" ].pop ("revision" , None )
109- assert result == self .serialised
103+ assert result == serialised
110104
111105 @patch ("haystack_integrations.document_stores.elasticsearch.document_store.Elasticsearch" )
112106 def test_to_dict_with_extra_args (self , _mock_elasticsearch_client ):
113107 doc_store = ElasticsearchDocumentStore ()
114- embedder = SentenceTransformersTextEmbedder ()
108+ embedder = OpenAITextEmbedder ()
115109 hybrid_retriever = ElasticsearchHybridRetriever (
116110 document_store = doc_store ,
117111 embedder = embedder ,
@@ -132,7 +126,7 @@ def test_to_dict_with_extra_args(self, _mock_elasticsearch_client):
132126 @patch ("haystack_integrations.document_stores.elasticsearch.document_store.Elasticsearch" )
133127 def test_from_dict_with_extra_args (self , _mock_elasticsearch_client ):
134128 doc_store = ElasticsearchDocumentStore ()
135- embedder = SentenceTransformersTextEmbedder ()
129+ embedder = OpenAITextEmbedder ()
136130 hybrid_retriever = ElasticsearchHybridRetriever (
137131 document_store = doc_store ,
138132 embedder = embedder ,
@@ -154,7 +148,7 @@ def test_from_dict_with_extra_args(self, _mock_elasticsearch_client):
154148 @patch ("haystack_integrations.document_stores.elasticsearch.document_store.Elasticsearch" )
155149 def test_to_dict_with_enum_filter_policies (self , _mock_elasticsearch_client ):
156150 doc_store = ElasticsearchDocumentStore ()
157- embedder = SentenceTransformersTextEmbedder ()
151+ embedder = OpenAITextEmbedder ()
158152 hybrid_retriever = ElasticsearchHybridRetriever (
159153 document_store = doc_store ,
160154 embedder = embedder ,
@@ -169,7 +163,7 @@ def test_to_dict_with_enum_filter_policies(self, _mock_elasticsearch_client):
169163 @patch ("haystack_integrations.document_stores.elasticsearch.document_store.Elasticsearch" )
170164 def test_to_dict_with_enum_join_mode (self , _mock_elasticsearch_client ):
171165 doc_store = ElasticsearchDocumentStore ()
172- embedder = SentenceTransformersTextEmbedder ()
166+ embedder = OpenAITextEmbedder ()
173167 hybrid_retriever = ElasticsearchHybridRetriever (
174168 document_store = doc_store ,
175169 embedder = embedder ,
0 commit comments