langchain-ai · hemidactylus · Sep 9, 2024 · Aug 29, 2024 · Aug 29, 2024 · Aug 30, 2024
diff --git a/libs/astradb/langchain_astradb/vectorstores.py b/libs/astradb/langchain_astradb/vectorstores.py
@@ -383,20 +383,20 @@ def __init__(
         Args:
             embedding: the embeddings function or service to use.
                 This enables client-side embedding functions or calls to external
-                embedding providers. If `embedding` is provided, arguments
-                `collection_vector_service_options` and
-                `collection_embedding_api_key` cannot be provided.
+                embedding providers. If ``embedding`` is provided, arguments
+                ``collection_vector_service_options`` and
+                ``collection_embedding_api_key`` cannot be provided.
             collection_name: name of the Astra DB collection to create/use.
             token: API token for Astra DB usage, either in the form of a string
-                or a subclass of `astrapy.authentication.TokenProvider`.
+                or a subclass of ``astrapy.authentication.TokenProvider``.
                 If not provided, the environment variable
                 ASTRA_DB_APPLICATION_TOKEN is inspected.
             api_endpoint: full URL to the API endpoint, such as
-                `https://<DB-ID>-us-east1.apps.astra.datastax.com`. If not provided,
+                ``https://<DB-ID>-us-east1.apps.astra.datastax.com``. If not provided,
                 the environment variable ASTRA_DB_API_ENDPOINT is inspected.
             environment: a string specifying the environment of the target Data API.
                 If omitted, defaults to "prod" (Astra DB production).
-                Other values are in `astrapy.constants.Environment` enum class.
+                Other values are in ``astrapy.constants.Environment`` enum class.
             astra_db_client:
                 *DEPRECATED starting from version 0.3.5.*
                 *Please use 'token', 'api_endpoint' and optionally 'environment'.*
@@ -436,18 +436,18 @@ def __init__(
                 (see docs.datastax.com/en/astra/astra-db-vector/api-reference/
                 data-api-commands.html#advanced-feature-indexing-clause-on-createcollection)
             collection_vector_service_options: specifies the use of server-side
-                embeddings within Astra DB. If passing this parameter, `embedding`
+                embeddings within Astra DB. If passing this parameter, ``embedding``
                 cannot be provided.
             collection_embedding_api_key: for usage of server-side embeddings
                 within Astra DB. With this parameter one can supply an API Key
                 that will be passed to Astra DB with each data request.
                 This parameter can be either a string or a subclass of
-                `astrapy.authentication.EmbeddingHeadersProvider`.
+                ``astrapy.authentication.EmbeddingHeadersProvider``.
                 This is useful when the service is configured for the collection,
                 but no corresponding secret is stored within
                 Astra's key management system.
                 This parameter cannot be provided without
-                specifying `collection_vector_service_options`.
+                specifying ``collection_vector_service_options``.
             content_field: name of the field containing the textual content
                 in the documents when saved on Astra DB. For vectorize collections,
                 this cannot be specified; for non-vectorize collection, defaults
@@ -457,36 +457,36 @@ def __init__(
                 guessed by inspection of a few documents from the collection, under the
                 assumption that the longer strings are the most likely candidates.
                 Please understand the limitations of this method and get some
-                understanding of your data before passing `"*"` for this parameter.
+                understanding of your data before passing ``"*"`` for this parameter.
             ignore_invalid_documents: if False (default), exceptions are raised
                 when a document is found on the Astra DB collectin that does
                 not have the expected shape. If set to True, such results
                 from the database are ignored and a warning is issued. Note
                 that in this case a similarity search may end up returning fewer
-                results than the required `k`.
+                results than the required ``k``.
             autodetect_collection: if True, turns on autodetect behavior.
                 The store will look for an existing collection of the provided name
                 and infer the store settings from it. Default is False.
-                In autodetect mode, `content_field` can be given as "*", meaning
-                that an attempt will be made to determine it by inspection
-                (unless vectorize is enabled, in which case `content_field` is ignored).
+                In autodetect mode, ``content_field`` can be given as ``"*"``, meaning
+                that an attempt will be made to determine it by inspection (unless
+                vectorize is enabled, in which case ``content_field`` is ignored).
                 In autodetect mode, the store not only determines whether embeddings
                 are client- or server-side, but - most importantly - switches
                 automatically between "nested" and "flat" representations of documents
-                on DB (i.e. having the metadata key-value pairs grouped in a `metadata`
-                field or spread at the documents' top-level). The former scheme
-                is the native mode of the AstraDBVectorStore; the store resorts
+                on DB (i.e. having the metadata key-value pairs grouped in a
+                ``metadata`` field or spread at the documents' top-level). The former
+                scheme is the native mode of the AstraDBVectorStore; the store resorts
                 to the latter in case of vector collections populated with external
                 means (such as a third-party data import tool) before applying
                 an AstraDBVectorStore to them.
                 Note that the following parameters cannot be used if this is True:
-                `metric`, `setup_mode`, `metadata_indexing_include`,
-                `metadata_indexing_exclude`, `collection_indexing_policy`,
-                `collection_vector_service_options`.
+                ``metric``, ``setup_mode``, ``metadata_indexing_include``,
+                ``metadata_indexing_exclude``, ``collection_indexing_policy``,
+                ``collection_vector_service_options``.
 
         Note:
-            For concurrency in synchronous :meth:`~add_texts`:, as a rule of thumb, on a
-            typical client machine it is suggested to keep the quantity
+            For concurrency in synchronous :meth:``~add_texts``:, as a rule of thumb,
+            on a typical client machine it is suggested to keep the quantity
             bulk_insert_batch_concurrency * bulk_insert_overwrite_concurrency
             much below 1000 to avoid exhausting the client multithreading/networking
             resources. The hardcoded defaults are somewhat conservative to meet
@@ -499,7 +499,7 @@ def __init__(
             depending on both the machine/network specs and the expected workload
             (specifically, how often a write is an update of an existing id).
             Remember you can pass concurrency settings to individual calls to
-            :meth:`~add_texts` and :meth:`~add_documents` as well.
+            :meth:``~add_texts`` and :meth:``~add_documents`` as well.
         """
         # general collection settings
         self.collection_name = collection_name
@@ -820,7 +820,7 @@ def delete_collection(self) -> None:
         """Completely delete the collection from the database.
 
         Completely delete the collection from the database (as opposed
-        to :meth:`~clear`, which empties it only).
+        to :meth:``~clear``, which empties it only).
         Stored data is lost and unrecoverable, resources are freed.
         Use with caution.
         """
@@ -831,7 +831,7 @@ async def adelete_collection(self) -> None:
         """Completely delete the collection from the database.
 
         Completely delete the collection from the database (as opposed
-        to :meth:`~aclear`, which empties it only).
+        to :meth:``~aclear``, which empties it only).
         Stored data is lost and unrecoverable, resources are freed.
         Use with caution.
         """
@@ -933,7 +933,7 @@ def add_texts(
         Note:
             There are constraints on the allowed field names
             in the metadata dictionaries, coming from the underlying Astra DB API.
-            For instance, the `$` (dollar sign) cannot be used in the dict keys.
+            For instance, the ``$`` (dollar sign) cannot be used in the dict keys.
             See this document for details:
             https://docs.datastax.com/en/astra/astra-db-vector/api-reference/data-api.html
 
@@ -1055,7 +1055,7 @@ async def aadd_texts(
         Note:
             There are constraints on the allowed field names
             in the metadata dictionaries, coming from the underlying Astra DB API.
-            For instance, the `$` (dollar sign) cannot be used in the dict keys.
+            For instance, the ``$`` (dollar sign) cannot be used in the dict keys.
             See this document for details:
             https://docs.datastax.com/en/astra/astra-db-vector/api-reference/data-api.html
 
@@ -1833,12 +1833,13 @@ def from_texts(
             metadatas: metadata dicts for the texts.
             ids: ids to associate to the texts.
             **kwargs: you can pass any argument that you would
-                to :meth:`~add_texts` and/or to the 'AstraDBVectorStore' constructor
-                (see these methods for details). These arguments will be
+                to :meth:``~add_texts`` and/or to the
+                ``AstraDBVectorStore`` constructor (see these methods for
+                details). These arguments will be
                 routed to the respective methods as they are.
 
         Returns:
-            an `AstraDBVectorStore` vectorstore.
+            an ``AstraDBVectorStore`` vectorstore.
         """
         _add_texts_inspection = inspect.getfullargspec(AstraDBVectorStore.add_texts)
         _method_args = (
@@ -1877,12 +1878,12 @@ async def afrom_texts(
             metadatas: metadata dicts for the texts.
             ids: ids to associate to the texts.
             **kwargs: you can pass any argument that you would
-                to :meth:`~aadd_texts` and/or to the 'AstraDBVectorStore' constructor
-                (see these methods for details). These arguments will be
-                routed to the respective methods as they are.
+                to :meth:``~aadd_texts`` and/or to the ``AstraDBVectorStore``
+                constructor (see these methods for details). These arguments
+                will be routed to the respective methods as they are.
 
         Returns:
-            an `AstraDBVectorStore` vectorstore.
+            an ``AstraDBVectorStore`` vectorstore.
         """
         _aadd_texts_inspection = inspect.getfullargspec(AstraDBVectorStore.aadd_texts)
         _method_args = (
@@ -1913,13 +1914,13 @@ def from_documents(
     ) -> AstraDBVectorStore:
         """Create an Astra DB vectorstore from a document list.
 
-        Utility method that defers to 'from_texts' (see that one).
+        Utility method that defers to ``from_texts`` (see that one).
 
-        Args: see 'from_texts', except here you have to supply 'documents'
-            in place of 'texts' and 'metadatas'.
+        Args: see ``from_texts``, except here you have to supply 'documents'
+            in place of ``texts`` and ``metadatas``.
 
         Returns:
-            an `AstraDBVectorStore` vectorstore.
+            an ``AstraDBVectorStore`` vectorstore.
         """
         texts = [d.page_content for d in documents]
         metadatas = [d.metadata for d in documents]
@@ -1941,11 +1942,11 @@ async def afrom_documents(
 
         Utility method that defers to 'afrom_texts' (see that one).
 
-        Args: see 'afrom_texts', except here you have to supply 'documents'
-            in place of 'texts' and 'metadatas'.
+        Args: see ``afrom_texts``, except here you have to supply ``documents``
+            in place of ``texts`` and ``metadatas``.
 
         Returns:
-            an `AstraDBVectorStore` vectorstore.
+            an ``AstraDBVectorStore`` vectorstore.
         """
         texts = [d.page_content for d in documents]
         metadatas = [d.metadata for d in documents]

diff --git a/libs/astradb/pyproject.toml b/libs/astradb/pyproject.toml
@@ -1,6 +1,6 @@
 [tool.poetry]
 name = "langchain-astradb"
-version = "0.3.5"
+version = "0.4.0"
 description = "An integration package connecting Astra DB and LangChain"
 authors = []
 readme = "README.md"

diff --git a/libs/astradb/tests/unit_tests/test_vs_autodetect_inferences.py b/libs/astradb/tests/unit_tests/test_vs_autodetect_inferences.py
@@ -78,79 +78,48 @@
     ({"$vector": [0], "_id": "a", "$vectorize": "a", "x": 9}, True),
 ]
 DOC_FLATNESS_TEST_IDS = [f"DOC=<{json.dumps(doc)}>" for doc, _ in DOC_FLATNESS_PAIRS]
+
 ff = FLAT_DOCUMENT
 df = DEEP_DOCUMENT  # noqa: PD901
 uf = UNKNOWN_FLATNESS_DOCUMENT
-DOCS_FLATNESS_PAIRS = [
-    ([], False),
-    ([uf], False),
-    ([uf, uf], False),
-    ([df], False),
-    ([df, df], False),
-    ([df, uf], False),
-    ([ff], True),
-    ([ff, ff], True),
-    ([ff, uf], True),
-    ([ff, df], ValueError()),
-]
-DOCS_FLATNESS_TEST_IDS = [
-    " docs=[] ",
-    " docs=[u] ",
-    " docs=[u, u] ",
-    " docs=[d] ",
-    " docs=[d, d] ",
-    " docs=[d, u] ",
-    " docs=[f] ",
-    " docs=[f, f] ",
-    " docs=[f, u] ",
-    " docs=[f, d] ",
-]
-DOC_CF_PAIRS = [
-    (DOCUMENT_WITH_CF_X, "x"),
-    (DOCUMENT_WITH_CF_Y, "y"),
-    (DOCUMENT_WITH_UNKNOWN_CF, None),
-    ({"x": "LL", "_id": "a"}, "x"),
-    ({"x": 1234, "_id": "a"}, None),
-    ({"_id": "a"}, None),
+DOCS_FLATNESS_TEST_PARAMS = [
+    pytest.param([], False, id=" docs=[] "),
+    pytest.param([uf], False, id=" docs=[u] "),
+    pytest.param([uf, uf], False, id=" docs=[u, u] "),
+    pytest.param([df], False, id=" docs=[d] "),
+    pytest.param([df, df], False, id=" docs=[d, d] "),
+    pytest.param([df, uf], False, id=" docs=[d, u] "),
+    pytest.param([ff], True, id=" docs=[f] "),
+    pytest.param([ff, ff], True, id=" docs=[f, f] "),
+    pytest.param([ff, uf], True, id=" docs=[f, u] "),
+    pytest.param([ff, df], ValueError(), id=" docs=[f, d] "),
 ]
-DOC_CF_TEST_IDS = [
-    "cf=x",
-    "cf=y",
-    "unknown-cf",
-    "only-x",
-    "x-is-number",
-    "no-fields",
+
+DOC_CF_TEST_PARAMS = [
+    pytest.param(DOCUMENT_WITH_CF_X, "x", id="cf=x"),
+    pytest.param(DOCUMENT_WITH_CF_Y, "y", id="cf=y"),
+    pytest.param(DOCUMENT_WITH_UNKNOWN_CF, None, id="unknown-cf"),
+    pytest.param({"x": "LL", "_id": "a"}, "x", id="only-x"),
+    pytest.param({"x": 1234, "_id": "a"}, None, id="x-is-number"),
+    pytest.param({"_id": "a"}, None, id="no-fields"),
 ]
+
 xc = DOCUMENT_WITH_CF_X
 yc = DOCUMENT_WITH_CF_Y
 uc = DOCUMENT_WITH_UNKNOWN_CF
-DOCS_CF_TRIPLES = [
-    ([], "q", "q"),
-    ([xc], "q", "q"),
-    ([xc, xc, yc], "q", "q"),
-    ([uc, uc], "q", "q"),
-    ([xc, uc, uc], "q", "q"),
-    ([xc, xc, yc, uc, uc, uc], "q", "q"),
-    ([], "*", ValueError),
-    ([xc], "*", "x"),
-    ([xc, xc, yc], "*", "x"),
-    ([uc, uc], "*", ValueError),
-    ([xc, uc, uc], "*", "x"),
-    ([xc, xc, yc, uc, uc, uc], "*", "x"),
-]
-DOCS_CF_TEST_IDS = [
-    "[]",
-    "[x]",
-    "[x, x, y]",
-    "[u, u]",
-    "[x, u, u]",
-    "[x, x, y, u, u, u]",
-    "[]",
-    "[x]",
-    "[x, x, y]",
-    "[u, u]",
-    "[x, u, u]",
-    "[x, x, y, u, u, u]",
+DOCS_CF_TEST_PARAMS = [
+    pytest.param([], "q", "q", id=" [],req='q' "),
+    pytest.param([xc], "q", "q", id=" [x],req='q' "),
+    pytest.param([xc, xc, yc], "q", "q", id=" [x, x, y],req='q' "),
+    pytest.param([uc, uc], "q", "q", id=" [u, u],req='q' "),
+    pytest.param([xc, uc, uc], "q", "q", id=" [x, u, u],req='q' "),
+    pytest.param([xc, xc, yc, uc, uc, uc], "q", "q", id=" [x, x, y, u, u, u],req='q' "),
+    pytest.param([], "*", ValueError, id=" [],req='*' "),
+    pytest.param([xc], "*", "x", id=" [x],req='*' "),
+    pytest.param([xc, xc, yc], "*", "x", id=" [x, x, y],req='*' "),
+    pytest.param([uc, uc], "*", ValueError, id=" [u, u],req='*' "),
+    pytest.param([xc, uc, uc], "*", "x", id=" [x, u, u],req='*' "),
+    pytest.param([xc, xc, yc, uc, uc, uc], "*", "x", id=" [x, x, y, u, u, u],req='*' "),
 ]
 
 
@@ -168,8 +137,7 @@ def test_detect_document_flatness(
 
     @pytest.mark.parametrize(
         ("documents", "expected_flatness"),
-        DOCS_FLATNESS_PAIRS,
-        ids=DOCS_FLATNESS_TEST_IDS,
+        DOCS_FLATNESS_TEST_PARAMS,
     )
     def test_detect_documents_flatness(
         self,
@@ -184,7 +152,8 @@ def test_detect_documents_flatness(
                 _detect_documents_flatness(documents)
 
     @pytest.mark.parametrize(
-        ("document", "expected_content_field"), DOC_CF_PAIRS, ids=DOC_CF_TEST_IDS
+        ("document", "expected_content_field"),
+        DOC_CF_TEST_PARAMS,
     )
     def test_detect_document_content_field(
         self,
@@ -201,8 +170,7 @@ def test_detect_document_content_field(
 
     @pytest.mark.parametrize(
         ("documents", "requested_content_field", "expected_content_field"),
-        DOCS_CF_TRIPLES,
-        ids=DOCS_CF_TEST_IDS,
+        DOCS_CF_TEST_PARAMS,
     )
     def test_detect_documents_content_field(
         self,