Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 0 additions & 1 deletion libs/astradb/codespell_ignore_words.txt
Original file line number Diff line number Diff line change
@@ -1 +0,0 @@
Haa
2 changes: 1 addition & 1 deletion libs/astradb/langchain_astradb/graph_vectorstores.py
Original file line number Diff line number Diff line change
Expand Up @@ -323,7 +323,7 @@ def mmr_traversal_search( # noqa: C901
def get_adjacent(tags: set[str]) -> Iterable[_Edge]:
targets: dict[str, _Edge] = {}

# TODO: Would be better parralelized
# TODO: Would be better parallelized
for tag in tags:
m_filter = (metadata_filter or {}).copy()
m_filter[self.link_from_metadata_key] = tag
Expand Down
2 changes: 1 addition & 1 deletion libs/astradb/langchain_astradb/utils/mmr_traversal.py
Original file line number Diff line number Diff line change
Expand Up @@ -117,7 +117,7 @@ def __init__(

# List of the candidates.
self.candidates = []
# ND array of the candidate embeddings.
# numpy n-dimensional array of the candidate embeddings.
self.candidate_embeddings = np.ndarray((0, self.dimensions), dtype=np.float32)

self.best_score = NEG_INF
Expand Down
11 changes: 4 additions & 7 deletions libs/astradb/langchain_astradb/vectorstores.py
Original file line number Diff line number Diff line change
Expand Up @@ -460,8 +460,6 @@ def __init__(
This is useful when the service is configured for the collection,
but no corresponding secret is stored within
Astra's key management system.
This parameter cannot be provided without
specifying ``collection_vector_service_options``.
content_field: name of the field containing the textual content
in the documents when saved on Astra DB. For vectorize collections,
this cannot be specified; for non-vectorize collection, defaults
Expand All @@ -473,7 +471,7 @@ def __init__(
Please understand the limitations of this method and get some
understanding of your data before passing ``"*"`` for this parameter.
ignore_invalid_documents: if False (default), exceptions are raised
when a document is found on the Astra DB collectin that does
when a document is found on the Astra DB collection that does
not have the expected shape. If set to True, such results
from the database are ignored and a warning is issued. Note
that in this case a similarity search may end up returning fewer
Expand Down Expand Up @@ -824,11 +822,10 @@ async def adelete(
raise ValueError(msg)

_max_workers = concurrency or self.bulk_delete_concurrency
return all(
await gather_with_concurrency(
_max_workers, *[self.adelete_by_document_id(doc_id) for doc_id in ids]
)
await gather_with_concurrency(
_max_workers, *[self.adelete_by_document_id(doc_id) for doc_id in ids]
)
return True

def delete_collection(self) -> None:
"""Completely delete the collection from the database.
Expand Down
37 changes: 24 additions & 13 deletions libs/astradb/testing.env.sample
Original file line number Diff line number Diff line change
@@ -1,13 +1,24 @@
export ASTRA_DB_APPLICATION_TOKEN="AstraCS:aaabbbccc..."
export ASTRA_DB_API_ENDPOINT="https://0123...-region.apps.astra.datastax.com"
export ASTRA_DB_KEYSPACE="default_keyspace"
# Optional (mostly for HCD and such):
# export ASTRA_DB_ENVIRONMENT="..."

# required to test vectorize with SHARED_SECRET. Comment on HCD and such.
export SHARED_SECRET_NAME_OPENAI="NAME_SUPPLIED_IN_ASTRA_KMS"
# required to test vectorize with HEADER
export OPENAI_API_KEY="sk-aaabbbccc..."

# change to "1" if nvidia server-side embeddings are available for the DB
export NVIDIA_VECTORIZE_AVAILABLE="0"
# ASTRA DB SETUP

ASTRA_DB_API_ENDPOINT=https://your_astra_db_id-your_region.apps.astra.datastax.com
ASTRA_DB_APPLICATION_TOKEN=AstraCS:your_astra_db_application_token
# ASTRA_DB_KEYSPACE=your_astra_db_keyspace
# ASTRA_DB_ENVIRONMENT="prod"

SHARED_SECRET_NAME_OPENAI="key_name_on_astra_kms"
OPENAI_API_KEY="..."


### For testing on HCD it will not do SHARED_SECRET vectorize and look something like:
#
#
#
# ASTRA_DB_APPLICATION_TOKEN="Cassandra:Y2Fzc2FuZHJh:Y2Fzc2FuZHJh"
# ASTRA_DB_API_ENDPOINT="http://localhost:8181"
# ASTRA_DB_KEYSPACE="keyspace_created_by_the_ci_for_testing"
# ASTRA_DB_ENVIRONMENT="hcd"
#
# OPENAI_API_KEY="..."
#
#
#
57 changes: 32 additions & 25 deletions libs/astradb/tests/conftest.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,34 +5,14 @@
from __future__ import annotations

import json
from typing import TYPE_CHECKING, Any

from langchain_core.embeddings import Embeddings
from langchain_core.language_models import LLM
from typing_extensions import override


class SomeEmbeddings(Embeddings):
"""Turn a sentence into an embedding vector in some way.
Not important how. It is deterministic is all that counts.
"""

def __init__(self, dimension: int) -> None:
self.dimension = dimension

def embed_documents(self, texts: list[str]) -> list[list[float]]:
return [self.embed_query(txt) for txt in texts]

async def aembed_documents(self, texts: list[str]) -> list[list[float]]:
return self.embed_documents(texts)

def embed_query(self, text: str) -> list[float]:
unnormed0 = [ord(c) for c in text[: self.dimension]]
unnormed = (unnormed0 + [1] + [0] * (self.dimension - 1 - len(unnormed0)))[
: self.dimension
]
norm = sum(x * x for x in unnormed) ** 0.5
return [x / norm for x in unnormed]

async def aembed_query(self, text: str) -> list[float]:
return self.embed_query(text)
if TYPE_CHECKING:
from langchain_core.callbacks import CallbackManagerForLLMRun


class ParserEmbeddings(Embeddings):
Expand Down Expand Up @@ -61,3 +41,30 @@ def embed_query(self, text: str) -> list[float]:

async def aembed_query(self, text: str) -> list[float]:
return self.embed_query(text)


class IdentityLLM(LLM):
num_calls: int = 0

@property
@override
def _llm_type(self) -> str:
return "fake"

@override
def _call(
self,
prompt: str,
stop: list[str] | None = None,
run_manager: CallbackManagerForLLMRun | None = None,
**kwargs: Any,
) -> str:
self.num_calls += 1
if stop is not None:
return f"STOP<{prompt.upper()}>"
return prompt

@property
@override
def _identifying_params(self) -> dict[str, Any]:
return {}
5 changes: 0 additions & 5 deletions libs/astradb/tests/integration_tests/.env.example

This file was deleted.

Loading
Loading