langchain-ai
diff --git a/‎libs/community/langchain_google_community/_utils.py‎
Lines changed: 7 additions & 13 deletions b/‎libs/community/langchain_google_community/_utils.py‎
Lines changed: 7 additions & 13 deletions
diff --git a/‎libs/community/langchain_google_community/bigquery.py‎
Lines changed: 14 additions & 7 deletions b/‎libs/community/langchain_google_community/bigquery.py‎
Lines changed: 14 additions & 7 deletions
diff --git a/‎libs/community/langchain_google_community/bq_storage_vectorstores/_base.py‎
Lines changed: 95 additions & 69 deletions b/‎libs/community/langchain_google_community/bq_storage_vectorstores/_base.py‎
Lines changed: 95 additions & 69 deletions
diff --git a/‎libs/community/langchain_google_community/bq_storage_vectorstores/bigquery.py‎
Lines changed: 5 additions & 3 deletions b/‎libs/community/langchain_google_community/bq_storage_vectorstores/bigquery.py‎
Lines changed: 5 additions & 3 deletions
@@ -27,10 +27,7 @@ def get_user_agent(module: Optional[str] = None) -> Tuple[str, str]:
     r"""Returns a custom user agent header.
 
     Args:
-        module (Optional[str]):
-            Optional. The module for a custom user agent header.
-    Returns:
-        Tuple[str, str]
+        module: The module for a custom user agent header.
     """
     try:
         langchain_version = metadata.version("langchain-google-community")
@@ -51,10 +48,7 @@ def get_client_info(module: Optional[str] = None) -> "ClientInfo":
     r"""Returns a client info object with a custom user agent header.
 
     Args:
-        module (Optional[str]):
-            Optional. The module for a custom user agent header.
-    Returns:
-        google.api_core.gapic_v1.client_info.ClientInfo
+        module: The module for a custom user agent header.
     """
     client_library_version, user_agent = get_user_agent(module)
     return ClientInfo(
@@ -67,7 +61,7 @@ def import_google() -> Tuple[Request, Credentials, ServiceCredentials]:
     """Import google libraries.
 
     Returns:
-        Tuple[Request, Credentials]: Request and Credentials classes.
+        `Request` and `Credentials` classes.
     """
     return (
         guard_import(
@@ -84,21 +78,21 @@ def import_google() -> Tuple[Request, Credentials, ServiceCredentials]:
 
 
 def import_installed_app_flow() -> InstalledAppFlow:
-    """Import InstalledAppFlow class.
+    """Import `InstalledAppFlow` class.
 
     Returns:
-        InstalledAppFlow: InstalledAppFlow class.
+        `InstalledAppFlow` class.
     """
     return guard_import(
         module_name="google_auth_oauthlib.flow", pip_name="google-auth-oauthlib"
     ).InstalledAppFlow
 
 
 def import_googleapiclient_resource_builder() -> build_resource:
-    """Import googleapiclient.discovery.build function.
+    """Import `googleapiclient.discovery.build` function.
 
     Returns:
-        build_resource: googleapiclient.discovery.build function.
+        `googleapiclient.discovery.build` function.
     """
     return guard_import(
         module_name="googleapiclient.discovery", pip_name="google-api-python-client"
 
@@ -20,13 +20,20 @@ def import_bigquery() -> Any:
 
 
 class BigQueryLoader(BaseLoader):
-    """Load from the Google Cloud Platform `BigQuery`.
+    """Load documents from Google Cloud BigQuery.
 
-    Each document represents one row of the result. The `page_content_columns`
-    are written into the `page_content` of the document. The `metadata_columns`
-    are written into the `metadata` of the document. By default, all columns
-    are written into the `page_content` and none into the `metadata`.
+    Inherits from [`BaseLoader`][langchain_core.document_loaders.BaseLoader].
 
+    Each row becomes a document. Columns can be mapped to `page_content` or
+    `metadata`. By default, all columns map to `page_content`.
+
+    !!! note "Installation"
+
+        Requires additional dependencies:
+
+        ```bash
+        pip install langchain-google-community[bigquery]
+        ```
     """
 
     def __init__(
@@ -48,8 +55,8 @@ def __init__(
                 document.
             credentials: Optional. Credentials for accessing Google APIs. Use this
                 parameter to override default credentials, such as to use Compute Engine
-                (``google.auth.compute_engine.Credentials``) or Service Account
-                (``google.oauth2.service_account.Credentials``) credentials directly.
+                (`google.auth.compute_engine.Credentials`) or Service Account
+                (`google.oauth2.service_account.Credentials`) credentials directly.
         """
         import_bigquery()
         self.query = query
 
@@ -32,28 +32,11 @@
 
 
 class BaseBigQueryVectorStore(VectorStore, BaseModel, ABC):
-    """
-    Abstract base class for BigQuery-based vector stores.
+    """Abstract base class for BigQuery-based vector stores.
 
     This class provides a foundation for storing, retrieving, and searching documents
     and their corresponding embeddings in BigQuery.
 
-    Attributes:
-        embedding: Embedding model for generating and comparing embeddings.
-        project_id: Google Cloud Project ID where BigQuery resources are located.
-        dataset_name: BigQuery dataset name.
-        table_name: BigQuery table name.
-        location: BigQuery region/location.
-        content_field: Name of the column storing document content (default: "content").
-        embedding_field: Name of the column storing text embeddings (default:
-            "embedding").
-        temp_dataset_name: Name of the BigQuery dataset to be used to upload temporary
-            BQ tables. If None, will default to "{dataset_name}_temp".
-        doc_id_field: Name of the column storing document IDs (default: "doc_id").
-        credentials: Optional Google Cloud credentials object.
-        embedding_dimension: Dimension of the embedding vectors (inferred if not
-            provided).
-
     Abstract Methods:
         sync_data: Synchronizes data between the vector store and BigQuery.
         get_documents: Retrieves documents based on IDs or filters.
@@ -62,21 +45,51 @@ class BaseBigQueryVectorStore(VectorStore, BaseModel, ABC):
     """
 
     model_config = ConfigDict(arbitrary_types_allowed=True)
+
     embedding: Embeddings
+    """Embedding model for generating and comparing embeddings."""
+
     project_id: str
+    """Google Cloud Project ID where BigQuery resources are located."""
+
     dataset_name: str
+    """BigQuery dataset name."""
+
     table_name: str
+    """BigQuery table name."""
+
     location: str
+    """BigQuery region/location."""
+
     content_field: str = "content"
+    """Name of the column storing document content."""
+
     embedding_field: str = "embedding"
+    """Name of the column storing text embeddings."""
+
     doc_id_field: str = "doc_id"
+    """Name of the column storing document IDs."""
+
     temp_dataset_name: Optional[str] = None
+    """Name of the BigQuery dataset to be used to upload temporary BQ tables.
+    
+    If `None`, will default to `'{dataset_name}_temp'`.
+    """
+
     credentials: Optional[Any] = None
+    """Optional Google Cloud credentials object."""
+
     embedding_dimension: Optional[int] = None
+    """Dimension of the embedding vectors (inferred if not provided)."""
+
     extra_fields: Union[Dict[str, str], None] = None
+
     table_schema: Any = None
+
     _bq_client: Any = None
+
     _logger: Any = None
+
     _full_table_id: Optional[str] = None
 
     @abstractmethod
@@ -249,8 +262,9 @@ def add_texts(  # type: ignore[override]
         Args:
             texts: List of strings to add to the `VectorStore`.
             metadatas: Optional list of metadata records associated with the texts.
-                (ie [{"url": "www.myurl1.com", "title": "title1"},
-                {"url": "www.myurl2.com", "title": "title2"}])
+
+                (i.e. `[{"url": "www.myurl1.com", "title": "title1"},
+                {"url": "www.myurl2.com", "title": "title2"}]`)
 
         Returns:
             List of IDs from adding the texts into the `VectorStore`.
@@ -266,18 +280,19 @@ def add_texts_with_embeddings(
         embs: List[List[float]],
         metadatas: Optional[List[dict]] = None,
     ) -> List[str]:
-        """Add precomputed embeddings and relative texts / metadatas to the `VectorStore`.
+        """Add precomputed embeddings & relative texts / metadatas to the `VectorStore`.
 
         Args:
             ids: List of unique IDs in string format
             texts: List of strings to add to the `VectorStore`.
             embs: List of lists of floats with text embeddings for texts.
             metadatas: Optional list of metadata records associated with the texts.
-                (ie `[{"url": "www.myurl1.com", "title": "title1"},
+
+                (i.e. `[{"url": "www.myurl1.com", "title": "title1"},
                 {"url": "www.myurl2.com", "title": "title2"}]`)
         Returns:
             List of IDs from adding the texts into the `VectorStore`.
-        """  # noqa: E501
+        """
         import pandas as pd
 
         ids = [uuid.uuid4().hex for _ in texts]
@@ -360,21 +375,21 @@ def similarity_search_by_vectors(
         with_embeddings: bool = False,
         **kwargs: Any,
     ) -> Any:
-        """Core similarity search function. Handles a list of embedding vectors,
-        optionally returning scores and embeddings.
+        """Core similarity search function.
+
+        Handles a list of embedding vectors, optionally returning scores and embeddings.
 
         Args:
-            embeddings: A list of embedding vectors, where each vector is a list of
+            embeddings: List of embedding vectors, where each vector is a list of
                 floats.
-            filter: (Optional) A dictionary specifying filtering criteria for the
-                documents.
-                Ie. {"title": "mytitle"}
-            k: (Optional) The number of top-ranking similar documents to return per
-                embedding. Defaults to 5.
-            with_scores: (Optional) If True, include similarity scores in the result
-                for each matched document. Defaults to False.
-            with_embeddings: (Optional) If True, include the matched document's
-                embedding vector in the result. Defaults to False.
+            filter: Dictionary specifying filtering criteria for the documents.
+
+                i.e. `{"title": "mytitle"}`
+            k: Number of top-ranking similar documents to return per embedding.
+            with_scores: If `True`, include similarity scores in the result for each
+                matched document.
+            with_embeddings: If `True`, include the matched document's embedding vector
+                in the result.
         Returns:
             A list of `k` documents for each embedding in `embeddings`
         """
@@ -406,10 +421,11 @@ def similarity_search_by_vector(
 
         Args:
             embedding: Embedding to look up documents similar to.
-            filter: (Optional) A dictionary specifying filtering criteria for the
-                documents. Ie. {"title": "mytitle"}
-            k: (Optional) The number of top-ranking similar documents to return per
-                embedding. Defaults to 5.
+            filter: Dictionary specifying filtering criteria for the documents.
+
+                i.e. `{"title": "mytitle"}`
+            k: Number of top-ranking similar documents to return per embedding.
+
         Returns:
             Return docs most similar to embedding vector.
         """
@@ -427,10 +443,11 @@ def similarity_search_by_vector_with_score(
 
         Args:
             embedding: Embedding to look up documents similar to.
-            filter: (Optional) A dictionary specifying filtering criteria for the
-                documents. Ie. {"title": "mytitle"}
-            k: (Optional) The number of top-ranking similar documents to return per
-                embedding. Defaults to 5.
+            filter: Dictionary specifying filtering criteria for the documents.
+
+                i.e. `{"title": "mytitle"}`
+            k: The number of top-ranking similar documents to return per embedding.
+
         Returns:
             Return docs most similar to embedding vector.
         """
@@ -444,11 +461,12 @@ def similarity_search(
         """Search for top `k` docs most similar to input query.
 
         Args:
-            query: search query to search documents with.
-            filter: (Optional) A dictionary specifying filtering criteria for the
-                documents. Ie. {"title": "mytitle"}
-            k: (Optional) The number of top-ranking similar documents to return per
-                embedding. Defaults to 5.
+            query: Search query to search documents with.
+            filter: Dictionary specifying filtering criteria for the documents.
+
+                i.e. `{"title": "mytitle"}`
+            k: The number of top-ranking similar documents to return per embedding.
+
         Returns:
             Return docs most similar to input query.
         """
@@ -468,11 +486,12 @@ def similarity_search_with_score(
         scores.
 
         Args:
-            query: search query to search documents with.
-            filter: (Optional) A dictionary specifying filtering criteria for the
-                documents. Ie. {"title": "mytitle"}
-            k: (Optional) The number of top-ranking similar documents to return per
-                embedding. Defaults to 5.
+            query: Search query to search documents with.
+            filter: Dictionary specifying filtering criteria for the documents.
+
+                i.e. `{"title": "mytitle"}`
+            k: The number of top-ranking similar documents to return per embedding.
+
         Returns:
             Return docs most similar to input query along with scores.
         """
@@ -506,20 +525,23 @@ def max_marginal_relevance_search(
 
         Args:
             **kwargs:
-            query: search query text.
+            query: Search query text.
             filter: Filter on metadata properties, e.g.
-                            {
-                                "str_property": "foo",
-                                "int_property": 123
-                            }
-            k: Number of Documents to return. Defaults to 5.
+
+                ```json
+                {
+                    "str_property": "foo",
+                    "int_property": 123
+                }
+                ```
+            k: Number of documents to return.
             fetch_k: Number of `Document` objects to fetch to pass to MMR algorithm.
-            lambda_mult: Number between `0` and `1` that determines the degree
-                        of diversity among the results with 0 corresponding
-                        to maximum diversity and `1` to minimum diversity.
-                        Defaults to 0.5.
+            lambda_mult: Number between `0` and `1` that determines the degree of
+                diversity among the results with 0 corresponding to maximum diversity
+                and `1` to minimum diversity.
+
         Returns:
-            List of Documents selected by maximal marginal relevance.
+            List of documents selected by maximal marginal relevance.
         """
         embedding = self.embedding.embed_query(query)
         return self.max_marginal_relevance_search_by_vector(
@@ -542,15 +564,19 @@ def max_marginal_relevance_search_by_vector(
         Args:
             embedding: Embedding to look up documents similar to.
             filter: Filter on metadata properties, e.g.
-                            {
-                                "str_property": "foo",
-                                "int_property": 123
-                            }
-            k: Number of Documents to return.
+
+                ```json
+                {
+                    "str_property": "foo",
+                    "int_property": 123
+                }
+                ```
+            k: Number of documents to return.
             fetch_k: Number of `Document` objects to fetch to pass to MMR algorithm.
             lambda_mult: Number between `0` and `1` that determines the degree
                 of diversity among the results with 0 corresponding
                 to maximum diversity and `1` to minimum diversity.
+
         Returns:
             List of Documents selected by maximal marginal relevance.
         """
 
@@ -27,16 +27,18 @@
 
 
 class BigQueryVectorStore(BaseBigQueryVectorStore):
-    """
-    A vector store implementation that utilizes BigQuery and BigQuery Vector Search.
+    """Vector store implementation that utilizes BigQuery and BigQuery Vector Search.
 
     This class provides efficient storage and retrieval of documents with vector
     embeddings within BigQuery. It is particularly indicated for prototyping, due the
     serverless nature of BigQuery, and batch retrieval.
-    It supports similarity search, filtering, and batch operations through
+
+    Supports similarity search, filtering, and batch operations through
     `batch_search` method.
+
     Optionally, this class can leverage a Vertex AI Feature Store for online serving
     through the `to_vertex_fs_vector_store` method.
+
     Note that the `bigquery.datasets.create permission` is required even if the
     dataset already exists. This can be avoided by specifying `temp_dataset_name` as
     the name of an existing dataset.