Skip to content

Commit aacc089

Browse files
julian-rischclaudebogdankostic
authored
fix: adapt optimum, ragas, elasticsearch, opensearch to APIs removed in Haystack 3.0 (#3538)
Co-authored-by: Claude Fable 5 <noreply@anthropic.com> Co-authored-by: bogdankostic <bogdankostic@web.de>
1 parent 7f469ff commit aacc089

4 files changed

Lines changed: 175 additions & 163 deletions

File tree

integrations/elasticsearch/tests/test_hybrid_retriever.py

Lines changed: 63 additions & 69 deletions
Original file line numberDiff line numberDiff line change
@@ -8,7 +8,7 @@
88

99
import pytest
1010
from haystack import Document, Pipeline
11-
from haystack.components.embedders import SentenceTransformersTextEmbedder
11+
from haystack.components.embedders import OpenAITextEmbedder
1212
from haystack.components.joiners.document_joiner import JoinMode
1313
from haystack.core.component import component
1414
from haystack.document_stores.types import FilterPolicy
@@ -25,93 +25,87 @@ def run(self, text: str, param_a: str = "default", param_b: str = "another_defau
2525

2626

2727
class TestElasticsearchHybridRetriever:
28-
serialised = { # noqa: RUF012
29-
"type": "haystack_integrations.components.retrievers.elasticsearch.elasticsearch_hybrid_retriever.ElasticsearchHybridRetriever", # noqa: E501
30-
"init_parameters": {
31-
"document_store": {
32-
"type": "haystack_integrations.document_stores.elasticsearch.document_store.ElasticsearchDocumentStore",
33-
"init_parameters": {
34-
"hosts": None,
35-
"custom_mapping": None,
36-
"index": "default",
37-
"api_key": {"type": "env_var", "env_vars": ["ELASTIC_API_KEY"], "strict": False},
38-
"api_key_id": {"type": "env_var", "env_vars": ["ELASTIC_API_KEY_ID"], "strict": False},
39-
"embedding_similarity_function": "cosine",
40-
"sparse_vector_field": None,
41-
"ingest_pipeline": None,
28+
@pytest.fixture(autouse=True)
29+
def openai_api_key(self, monkeypatch):
30+
# the serde tests build a real OpenAITextEmbedder; haystack-ai 2.x resolves the key at init
31+
monkeypatch.setenv("OPENAI_API_KEY", "fake-key")
32+
33+
@pytest.fixture
34+
def serialised(self):
35+
return {
36+
"type": "haystack_integrations.components.retrievers.elasticsearch.elasticsearch_hybrid_retriever.ElasticsearchHybridRetriever", # noqa: E501
37+
"init_parameters": {
38+
"document_store": {
39+
"type": (
40+
"haystack_integrations.document_stores.elasticsearch.document_store.ElasticsearchDocumentStore"
41+
),
42+
"init_parameters": {
43+
"hosts": None,
44+
"custom_mapping": None,
45+
"index": "default",
46+
"api_key": {"type": "env_var", "env_vars": ["ELASTIC_API_KEY"], "strict": False},
47+
"api_key_id": {"type": "env_var", "env_vars": ["ELASTIC_API_KEY_ID"], "strict": False},
48+
"embedding_similarity_function": "cosine",
49+
"sparse_vector_field": None,
50+
"ingest_pipeline": None,
51+
},
4252
},
43-
},
44-
"embedder": {
45-
"type": "haystack.components.embedders.sentence_transformers_text_embedder.SentenceTransformersTextEmbedder", # noqa: E501
46-
"init_parameters": {
47-
"model": "sentence-transformers/all-mpnet-base-v2",
48-
"token": {"type": "env_var", "env_vars": ["HF_API_TOKEN", "HF_TOKEN"], "strict": False},
49-
"prefix": "",
50-
"suffix": "",
51-
"local_files_only": False,
52-
"batch_size": 32,
53-
"progress_bar": True,
54-
"normalize_embeddings": False,
55-
"trust_remote_code": False,
56-
"truncate_dim": None,
57-
"model_kwargs": None,
58-
"tokenizer_kwargs": None,
59-
"config_kwargs": None,
60-
"precision": "float32",
61-
"encode_kwargs": None,
62-
"backend": "torch",
53+
"embedder": {
54+
"type": "haystack.components.embedders.openai_text_embedder.OpenAITextEmbedder",
55+
"init_parameters": {
56+
"api_key": {"type": "env_var", "env_vars": ["OPENAI_API_KEY"], "strict": True},
57+
"model": "text-embedding-ada-002",
58+
"dimensions": None,
59+
"api_base_url": None,
60+
"organization": None,
61+
"prefix": "",
62+
"suffix": "",
63+
"timeout": None,
64+
"max_retries": None,
65+
"http_client_kwargs": None,
66+
},
6367
},
68+
"filters_bm25": None,
69+
"fuzziness": "AUTO",
70+
"top_k_bm25": 10,
71+
"scale_score": False,
72+
"filter_policy_bm25": "replace",
73+
"filters_embedding": None,
74+
"top_k_embedding": 10,
75+
"num_candidates": None,
76+
"filter_policy_embedding": "replace",
77+
"join_mode": "reciprocal_rank_fusion",
78+
"weights": None,
79+
"top_k": None,
80+
"sort_by_score": True,
6481
},
65-
"filters_bm25": None,
66-
"fuzziness": "AUTO",
67-
"top_k_bm25": 10,
68-
"scale_score": False,
69-
"filter_policy_bm25": "replace",
70-
"filters_embedding": None,
71-
"top_k_embedding": 10,
72-
"num_candidates": None,
73-
"filter_policy_embedding": "replace",
74-
"join_mode": "reciprocal_rank_fusion",
75-
"weights": None,
76-
"top_k": None,
77-
"sort_by_score": True,
78-
},
79-
}
82+
}
8083

8184
@pytest.fixture
8285
def mock_embedder(self):
8386
return MockedTextEmbedder()
8487

8588
@patch("haystack_integrations.document_stores.elasticsearch.document_store.Elasticsearch")
86-
def test_to_dict(self, _mock_elasticsearch_client) -> None:
89+
def test_to_dict(self, _mock_elasticsearch_client, serialised) -> None:
8790
doc_store = ElasticsearchDocumentStore()
88-
embedder = SentenceTransformersTextEmbedder() # we use actual embedder here for the de/serialization
91+
embedder = OpenAITextEmbedder() # we use actual embedder here for the de/serialization
8992
hybrid_retriever = ElasticsearchHybridRetriever(document_store=doc_store, embedder=embedder)
9093
result = hybrid_retriever.to_dict()
9194

92-
result["init_parameters"]["embedder"]["init_parameters"].pop("device", None)
93-
94-
expected = deepcopy(self.serialised)
95-
# revision was added in Haystack 2.20.0; include it in expected if present in result
96-
if "revision" in result["init_parameters"]["embedder"]["init_parameters"]:
97-
expected["init_parameters"]["embedder"]["init_parameters"]["revision"] = None
98-
99-
assert result == expected
95+
assert result == serialised
10096

10197
@patch("haystack_integrations.document_stores.elasticsearch.document_store.Elasticsearch")
102-
def test_from_dict(self, _mock_elasticsearch_client):
103-
data = deepcopy(self.serialised)
98+
def test_from_dict(self, _mock_elasticsearch_client, serialised):
99+
data = deepcopy(serialised)
104100
deserialized = ElasticsearchHybridRetriever.from_dict(data)
105101
assert isinstance(deserialized, ElasticsearchHybridRetriever)
106102
result = deserialized.to_dict()
107-
result["init_parameters"]["embedder"]["init_parameters"].pop("device", None)
108-
result["init_parameters"]["embedder"]["init_parameters"].pop("revision", None)
109-
assert result == self.serialised
103+
assert result == serialised
110104

111105
@patch("haystack_integrations.document_stores.elasticsearch.document_store.Elasticsearch")
112106
def test_to_dict_with_extra_args(self, _mock_elasticsearch_client):
113107
doc_store = ElasticsearchDocumentStore()
114-
embedder = SentenceTransformersTextEmbedder()
108+
embedder = OpenAITextEmbedder()
115109
hybrid_retriever = ElasticsearchHybridRetriever(
116110
document_store=doc_store,
117111
embedder=embedder,
@@ -132,7 +126,7 @@ def test_to_dict_with_extra_args(self, _mock_elasticsearch_client):
132126
@patch("haystack_integrations.document_stores.elasticsearch.document_store.Elasticsearch")
133127
def test_from_dict_with_extra_args(self, _mock_elasticsearch_client):
134128
doc_store = ElasticsearchDocumentStore()
135-
embedder = SentenceTransformersTextEmbedder()
129+
embedder = OpenAITextEmbedder()
136130
hybrid_retriever = ElasticsearchHybridRetriever(
137131
document_store=doc_store,
138132
embedder=embedder,
@@ -154,7 +148,7 @@ def test_from_dict_with_extra_args(self, _mock_elasticsearch_client):
154148
@patch("haystack_integrations.document_stores.elasticsearch.document_store.Elasticsearch")
155149
def test_to_dict_with_enum_filter_policies(self, _mock_elasticsearch_client):
156150
doc_store = ElasticsearchDocumentStore()
157-
embedder = SentenceTransformersTextEmbedder()
151+
embedder = OpenAITextEmbedder()
158152
hybrid_retriever = ElasticsearchHybridRetriever(
159153
document_store=doc_store,
160154
embedder=embedder,
@@ -169,7 +163,7 @@ def test_to_dict_with_enum_filter_policies(self, _mock_elasticsearch_client):
169163
@patch("haystack_integrations.document_stores.elasticsearch.document_store.Elasticsearch")
170164
def test_to_dict_with_enum_join_mode(self, _mock_elasticsearch_client):
171165
doc_store = ElasticsearchDocumentStore()
172-
embedder = SentenceTransformersTextEmbedder()
166+
embedder = OpenAITextEmbedder()
173167
hybrid_retriever = ElasticsearchHybridRetriever(
174168
document_store=doc_store,
175169
embedder=embedder,

0 commit comments

Comments
 (0)