Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
29 changes: 22 additions & 7 deletions libs/astradb/tests/integration_tests/conftest.py
Original file line number Diff line number Diff line change
Expand Up @@ -114,6 +114,25 @@ def astra_db_env_vars_available() -> bool:

_load_env()

DATA_API_ENVIRONMENT = os.getenv("ASTRA_DB_ENVIRONMENT", "prod")
IS_ASTRA_DB = DATA_API_ENVIRONMENT.lower() in {
"prod",
"test",
"dev",
}

# Database bug workaround flags:
#
# 1. Image hcd:1.2.1-early-preview used for testing suffers from:
# github.com/datastax/cassandra/pull/1653 (aka "cndb 13480").
# (It's a bug upon delete-then-run-ANN)
# This flag lifts the related tests (remove once a newer HCD is used):
SKIP_CNDB_13480_TESTS = not IS_ASTRA_DB
# 2. Astra DB version deployed in prod suffers from:
# https://github.com/riptano/cndb/issues/14524
# (It's a bug about insert1, ANN, insert2, ANN -> some rows may not be seen)
# This flag lifts the related tests (remove once a newer deploy takes place):
SKIP_CNDB_14524_TESTS = IS_ASTRA_DB

OPENAI_VECTORIZE_OPTIONS_HEADER = VectorServiceOptions(
provider="openai",
Expand Down Expand Up @@ -175,17 +194,13 @@ def astra_db_credentials() -> AstraDBCredentials:
"token": os.environ["ASTRA_DB_APPLICATION_TOKEN"],
"api_endpoint": os.environ["ASTRA_DB_API_ENDPOINT"],
"namespace": os.getenv("ASTRA_DB_KEYSPACE"),
"environment": os.getenv("ASTRA_DB_ENVIRONMENT", "prod"),
"environment": DATA_API_ENVIRONMENT,
}


@pytest.fixture(scope="session")
def is_astra_db(astra_db_credentials: AstraDBCredentials) -> bool:
return astra_db_credentials["environment"].lower() in {
"prod",
"test",
"dev",
}
def is_astra_db() -> bool:
return IS_ASTRA_DB


@pytest.fixture(scope="session")
Expand Down
13 changes: 9 additions & 4 deletions libs/astradb/tests/integration_tests/test_semantic_cache.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@
from langchain_astradb.utils.astradb import SetupMode

from .conftest import (
SKIP_CNDB_14524_TESTS,
AstraDBCredentials,
astra_db_env_vars_available,
)
Expand Down Expand Up @@ -155,13 +156,15 @@ def test_semantic_cache_through_llm_sync(
test_llm.generate(["[3,4]"])
test_llm.generate(["[3,4]"])
test_llm.generate(["[3,4]"])
assert test_llm.num_calls == 1
if not SKIP_CNDB_14524_TESTS:
assert test_llm.num_calls == 1

# clear the cache and check a new LLM call is actually made
astradb_semantic_cache.clear()
test_llm.generate(["[3,4]"])
test_llm.generate(["[3,4]"])
assert test_llm.num_calls == 2
if not SKIP_CNDB_14524_TESTS:
assert test_llm.num_calls == 2

async def test_semantic_cache_through_llm_async(
self,
Expand Down Expand Up @@ -191,10 +194,12 @@ async def test_semantic_cache_through_llm_async(
await test_llm.agenerate(["[3,4]"])
await test_llm.agenerate(["[3,4]"])
await test_llm.agenerate(["[3,4]"])
assert test_llm.num_calls == 1
if not SKIP_CNDB_14524_TESTS:
assert test_llm.num_calls == 1

# clear the cache and check a new LLM call is actually made
await astradb_semantic_cache.aclear()
await test_llm.agenerate(["[3,4]"])
await test_llm.agenerate(["[3,4]"])
assert test_llm.num_calls == 2
if not SKIP_CNDB_14524_TESTS:
assert test_llm.num_calls == 2
93 changes: 55 additions & 38 deletions libs/astradb/tests/integration_tests/test_vectorstore.py
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,7 @@
EUCLIDEAN_MIN_SIM_UNIT_VECTORS,
MATCH_EPSILON,
OPENAI_VECTORIZE_OPTIONS_HEADER,
SKIP_CNDB_14524_TESTS,
astra_db_env_vars_available,
)

Expand Down Expand Up @@ -177,11 +178,12 @@ def test_astradb_vectorstore_from_texts_sync(
page_contents[3],
k=1,
)
assert len(search_results_triples_1) == 1
res_doc_1, _, res_id_1 = search_results_triples_1[0]
assert res_doc_1.page_content == page_contents[3]
assert res_doc_1.metadata == {"m": 7}
assert res_id_1 == "ft7"
if not SKIP_CNDB_14524_TESTS:
assert len(search_results_triples_1) == 1
res_doc_1, _, res_id_1 = search_results_triples_1[0]
assert res_doc_1.page_content == page_contents[3]
assert res_doc_1.metadata == {"m": 7}
assert res_id_1 == "ft7"
# routing of 'add_texts' keyword arguments
v_store_2 = AstraDBVectorStore.from_texts(
texts=page_contents[4:6],
Expand All @@ -205,11 +207,12 @@ def test_astradb_vectorstore_from_texts_sync(
page_contents[5],
k=1,
)
assert len(search_results_triples_2) == 1
res_doc_2, _, res_id_2 = search_results_triples_2[0]
assert res_doc_2.page_content == page_contents[5]
assert res_doc_2.metadata == {"m": 11}
assert res_id_2 == "ft11"
if not SKIP_CNDB_14524_TESTS:
assert len(search_results_triples_2) == 1
res_doc_2, _, res_id_2 = search_results_triples_2[0]
assert res_doc_2.page_content == page_contents[5]
assert res_doc_2.metadata == {"m": 11}
assert res_id_2 == "ft11"

@pytest.mark.parametrize(
("is_vectorize", "page_contents", "collection_fixture_name"),
Expand Down Expand Up @@ -434,9 +437,10 @@ async def test_astradb_vectorstore_from_texts_async(
)
assert len(search_results_triples_1) == 1
res_doc_1, _, res_id_1 = search_results_triples_1[0]
assert res_doc_1.page_content == page_contents[3]
assert res_doc_1.metadata == {"m": 7}
assert res_id_1 == "ft7"
if not SKIP_CNDB_14524_TESTS:
assert res_doc_1.page_content == page_contents[3]
assert res_doc_1.metadata == {"m": 7}
assert res_id_1 == "ft7"
# routing of 'add_texts' keyword arguments
v_store_2 = await AstraDBVectorStore.afrom_texts(
texts=page_contents[4:6],
Expand All @@ -460,11 +464,12 @@ async def test_astradb_vectorstore_from_texts_async(
page_contents[5],
k=1,
)
assert len(search_results_triples_2) == 1
res_doc_2, _, res_id_2 = search_results_triples_2[0]
assert res_doc_2.page_content == page_contents[5]
assert res_doc_2.metadata == {"m": 11}
assert res_id_2 == "ft11"
if not SKIP_CNDB_14524_TESTS:
assert len(search_results_triples_2) == 1
res_doc_2, _, res_id_2 = search_results_triples_2[0]
assert res_doc_2.page_content == page_contents[5]
assert res_doc_2.metadata == {"m": 11}
assert res_id_2 == "ft11"

@pytest.mark.parametrize(
("is_vectorize", "page_contents", "collection_fixture_name"),
Expand Down Expand Up @@ -644,21 +649,24 @@ def test_astradb_vectorstore_crud_sync(
# not requiring ordered match (elsewhere it may be overwriting some)
assert set(added_ids_1) == {"c", "d"}
res2 = vstore.similarity_search("[-1,-1]", k=10)
assert len(res2) == 4
if not SKIP_CNDB_14524_TESTS:
assert len(res2) == 4
# pick one that was just updated and check its metadata
res3 = vstore.similarity_search_with_score_id(
query="[5,6]", k=1, filter={"k": "c_new"}
)
doc3, _, id3 = res3[0]
assert doc3.page_content == "[5,6]"
assert doc3.metadata == {"k": "c_new", "ord": 102}
assert id3 == "c"
if not SKIP_CNDB_14524_TESTS:
doc3, _, id3 = res3[0]
assert doc3.page_content == "[5,6]"
assert doc3.metadata == {"k": "c_new", "ord": 102}
assert id3 == "c"
# delete and count again
del1_res = vstore.delete(["b"])
assert del1_res is True
del2_res = vstore.delete(["a", "c", "Z!"])
assert del2_res is True # a non-existing ID was supplied
assert len(vstore.similarity_search("[-1,-1]", k=10)) == 1
if not SKIP_CNDB_14524_TESTS:
assert len(vstore.similarity_search("[-1,-1]", k=10)) == 1
# clear store
vstore.clear()
assert vstore.similarity_search("[-1,-1]", k=2) == []
Expand All @@ -680,13 +688,16 @@ def test_astradb_vectorstore_crud_sync(
metadatas=[{"k": "r", "ord": 306}, {"k": "s", "ord": 307}],
ids=["r", "s"],
)
assert len(vstore.similarity_search("[-1,-1]", k=10)) == 4
if not SKIP_CNDB_14524_TESTS:
assert len(vstore.similarity_search("[-1,-1]", k=10)) == 4
res4 = vstore.similarity_search("[-1,-1]", k=1, filter={"k": "s"})
assert res4[0].metadata["ord"] == 307
assert res4[0].id == "s"
if not SKIP_CNDB_14524_TESTS:
assert res4[0].metadata["ord"] == 307
assert res4[0].id == "s"
# delete_by_document_id
vstore.delete_by_document_id("s")
assert len(vstore.similarity_search("[-1,-1]", k=10)) == 3
if not SKIP_CNDB_14524_TESTS:
assert len(vstore.similarity_search("[-1,-1]", k=10)) == 3

@pytest.mark.parametrize(
"vector_store",
Expand Down Expand Up @@ -739,21 +750,24 @@ async def test_astradb_vectorstore_crud_async(
# not requiring ordered match (elsewhere it may be overwriting some)
assert set(added_ids_1) == {"c", "d"}
res2 = await vstore.asimilarity_search("[-1,-1]", k=10)
assert len(res2) == 4
if not SKIP_CNDB_14524_TESTS:
assert len(res2) == 4
# pick one that was just updated and check its metadata
res3 = await vstore.asimilarity_search_with_score_id(
query="[5,6]", k=1, filter={"k": "c_new"}
)
doc3, _, id3 = res3[0]
assert doc3.page_content == "[5,6]"
assert doc3.metadata == {"k": "c_new", "ord": 102}
assert id3 == "c"
if not SKIP_CNDB_14524_TESTS:
doc3, _, id3 = res3[0]
assert doc3.page_content == "[5,6]"
assert doc3.metadata == {"k": "c_new", "ord": 102}
assert id3 == "c"
# delete and count again
del1_res = await vstore.adelete(["b"])
assert del1_res is True
del2_res = await vstore.adelete(["a", "c", "Z!"])
assert del2_res is True # a non-existing ID was supplied
assert len(await vstore.asimilarity_search("[-1,-1]", k=10)) == 1
if not SKIP_CNDB_14524_TESTS:
assert len(await vstore.asimilarity_search("[-1,-1]", k=10)) == 1
# clear store
await vstore.aclear()
assert await vstore.asimilarity_search("[-1,-1]", k=2) == []
Expand All @@ -775,13 +789,16 @@ async def test_astradb_vectorstore_crud_async(
metadatas=[{"k": "r", "ord": 306}, {"k": "s", "ord": 307}],
ids=["r", "s"],
)
assert len(await vstore.asimilarity_search("[-1,-1]", k=10)) == 4
if not SKIP_CNDB_14524_TESTS:
assert len(await vstore.asimilarity_search("[-1,-1]", k=10)) == 4
res4 = await vstore.asimilarity_search("[-1,-1]", k=1, filter={"k": "s"})
assert res4[0].metadata["ord"] == 307
assert res4[0].id == "s"
if not SKIP_CNDB_14524_TESTS:
assert res4[0].metadata["ord"] == 307
assert res4[0].id == "s"
# delete_by_document_id
await vstore.adelete_by_document_id("s")
assert len(await vstore.asimilarity_search("[-1,-1]", k=10)) == 3
if not SKIP_CNDB_14524_TESTS:
assert len(await vstore.asimilarity_search("[-1,-1]", k=10)) == 3

def test_astradb_vectorstore_massive_insert_replace_sync(
self,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,6 @@

from __future__ import annotations

import os
from typing import TYPE_CHECKING, Any, Iterable

import pytest
Expand All @@ -31,6 +30,7 @@
LEXICAL_OPTIONS,
NVIDIA_RERANKING_OPTIONS_HEADER,
OPENAI_VECTORIZE_OPTIONS_HEADER,
SKIP_CNDB_13480_TESTS,
astra_db_env_vars_available,
)

Expand Down Expand Up @@ -240,9 +240,7 @@ def test_autodetect_flat_novectorize_crud(
del_by_md = ad_store.delete_by_metadata_filter(filter={"q2": "Q2"})
assert del_by_md is not None
assert del_by_md == 1
# TODO: Remove this flag once `github.com/datastax/cassandra/pull/1653`
# makes it to the testing HCD
if "LANGCHAIN_TEST_NO_CNDB13480" not in os.environ:
if not SKIP_CNDB_13480_TESTS:
results2n = ad_store.similarity_search("[-1,-1]", k=3, filter={"q2": "Q2"})
assert results2n == []

Expand Down Expand Up @@ -330,9 +328,7 @@ def test_autodetect_default_novectorize_crud(
del_by_md = ad_store.delete_by_metadata_filter(filter={"q2": "Q2"})
assert del_by_md is not None
assert del_by_md == 1
# TODO: Remove this flag once `github.com/datastax/cassandra/pull/1653`
# makes it to the testing HCD
if "LANGCHAIN_TEST_NO_CNDB13480" not in os.environ:
if not SKIP_CNDB_13480_TESTS:
results2n = ad_store.similarity_search("[-1,-1]", k=3, filter={"q2": "Q2"})
assert results2n == []

Expand Down Expand Up @@ -424,9 +420,7 @@ def test_autodetect_flat_vectorize_crud(
del_by_md = ad_store.delete_by_metadata_filter(filter={"q2": "Q2"})
assert del_by_md is not None
assert del_by_md == 1
# TODO: Remove this flag once `github.com/datastax/cassandra/pull/1653`
# makes it to the testing HCD
if "LANGCHAIN_TEST_NO_CNDB13480" not in os.environ:
if not SKIP_CNDB_13480_TESTS:
results2n = ad_store.similarity_search("[-1,-1]", k=3, filter={"q2": "Q2"})
assert results2n == []

Expand Down Expand Up @@ -516,9 +510,7 @@ def test_autodetect_default_vectorize_crud(
del_by_md = ad_store.delete_by_metadata_filter(filter={"q2": "Q2"})
assert del_by_md is not None
assert del_by_md == 1
# TODO: Remove this flag once `github.com/datastax/cassandra/pull/1653`
# makes it to the testing HCD
if "LANGCHAIN_TEST_NO_CNDB13480" not in os.environ:
if not SKIP_CNDB_13480_TESTS:
results2n = ad_store.similarity_search("[-1,-1]", k=3, filter={"q2": "Q2"})
assert results2n == []

Expand Down