Skip to content

Commit 4270e52

Browse files
docs(community): update docstrings for reference docs #1237 (#1255)
## Description Updated docstrings in `langchain-google-community` to be in accordance with the new reference docs site. - Updated 22 files docsrtrings - Added cross-references to base classes - Added admonitions for installation, authentication, and security notes - Updated code snippets and examples ## Relevant issues #1237 ## Type 📖 Documentation --------- Co-authored-by: Mason Daugherty <[email protected]> Co-authored-by: Mason Daugherty <[email protected]>
1 parent 30486ab commit 4270e52

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

43 files changed

+1469
-1286
lines changed

libs/community/langchain_google_community/_utils.py

Lines changed: 7 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -27,10 +27,7 @@ def get_user_agent(module: Optional[str] = None) -> Tuple[str, str]:
2727
r"""Returns a custom user agent header.
2828
2929
Args:
30-
module (Optional[str]):
31-
Optional. The module for a custom user agent header.
32-
Returns:
33-
Tuple[str, str]
30+
module: The module for a custom user agent header.
3431
"""
3532
try:
3633
langchain_version = metadata.version("langchain-google-community")
@@ -51,10 +48,7 @@ def get_client_info(module: Optional[str] = None) -> "ClientInfo":
5148
r"""Returns a client info object with a custom user agent header.
5249
5350
Args:
54-
module (Optional[str]):
55-
Optional. The module for a custom user agent header.
56-
Returns:
57-
google.api_core.gapic_v1.client_info.ClientInfo
51+
module: The module for a custom user agent header.
5852
"""
5953
client_library_version, user_agent = get_user_agent(module)
6054
return ClientInfo(
@@ -67,7 +61,7 @@ def import_google() -> Tuple[Request, Credentials, ServiceCredentials]:
6761
"""Import google libraries.
6862
6963
Returns:
70-
Tuple[Request, Credentials]: Request and Credentials classes.
64+
`Request` and `Credentials` classes.
7165
"""
7266
return (
7367
guard_import(
@@ -84,21 +78,21 @@ def import_google() -> Tuple[Request, Credentials, ServiceCredentials]:
8478

8579

8680
def import_installed_app_flow() -> InstalledAppFlow:
87-
"""Import InstalledAppFlow class.
81+
"""Import `InstalledAppFlow` class.
8882
8983
Returns:
90-
InstalledAppFlow: InstalledAppFlow class.
84+
`InstalledAppFlow` class.
9185
"""
9286
return guard_import(
9387
module_name="google_auth_oauthlib.flow", pip_name="google-auth-oauthlib"
9488
).InstalledAppFlow
9589

9690

9791
def import_googleapiclient_resource_builder() -> build_resource:
98-
"""Import googleapiclient.discovery.build function.
92+
"""Import `googleapiclient.discovery.build` function.
9993
10094
Returns:
101-
build_resource: googleapiclient.discovery.build function.
95+
`googleapiclient.discovery.build` function.
10296
"""
10397
return guard_import(
10498
module_name="googleapiclient.discovery", pip_name="google-api-python-client"

libs/community/langchain_google_community/bigquery.py

Lines changed: 14 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -20,13 +20,20 @@ def import_bigquery() -> Any:
2020

2121

2222
class BigQueryLoader(BaseLoader):
23-
"""Load from the Google Cloud Platform `BigQuery`.
23+
"""Load documents from Google Cloud BigQuery.
2424
25-
Each document represents one row of the result. The `page_content_columns`
26-
are written into the `page_content` of the document. The `metadata_columns`
27-
are written into the `metadata` of the document. By default, all columns
28-
are written into the `page_content` and none into the `metadata`.
25+
Inherits from [`BaseLoader`][langchain_core.document_loaders.BaseLoader].
2926
27+
Each row becomes a document. Columns can be mapped to `page_content` or
28+
`metadata`. By default, all columns map to `page_content`.
29+
30+
!!! note "Installation"
31+
32+
Requires additional dependencies:
33+
34+
```bash
35+
pip install langchain-google-community[bigquery]
36+
```
3037
"""
3138

3239
def __init__(
@@ -48,8 +55,8 @@ def __init__(
4855
document.
4956
credentials: Optional. Credentials for accessing Google APIs. Use this
5057
parameter to override default credentials, such as to use Compute Engine
51-
(``google.auth.compute_engine.Credentials``) or Service Account
52-
(``google.oauth2.service_account.Credentials``) credentials directly.
58+
(`google.auth.compute_engine.Credentials`) or Service Account
59+
(`google.oauth2.service_account.Credentials`) credentials directly.
5360
"""
5461
import_bigquery()
5562
self.query = query

libs/community/langchain_google_community/bq_storage_vectorstores/_base.py

Lines changed: 95 additions & 69 deletions
Original file line numberDiff line numberDiff line change
@@ -32,28 +32,11 @@
3232

3333

3434
class BaseBigQueryVectorStore(VectorStore, BaseModel, ABC):
35-
"""
36-
Abstract base class for BigQuery-based vector stores.
35+
"""Abstract base class for BigQuery-based vector stores.
3736
3837
This class provides a foundation for storing, retrieving, and searching documents
3938
and their corresponding embeddings in BigQuery.
4039
41-
Attributes:
42-
embedding: Embedding model for generating and comparing embeddings.
43-
project_id: Google Cloud Project ID where BigQuery resources are located.
44-
dataset_name: BigQuery dataset name.
45-
table_name: BigQuery table name.
46-
location: BigQuery region/location.
47-
content_field: Name of the column storing document content (default: "content").
48-
embedding_field: Name of the column storing text embeddings (default:
49-
"embedding").
50-
temp_dataset_name: Name of the BigQuery dataset to be used to upload temporary
51-
BQ tables. If None, will default to "{dataset_name}_temp".
52-
doc_id_field: Name of the column storing document IDs (default: "doc_id").
53-
credentials: Optional Google Cloud credentials object.
54-
embedding_dimension: Dimension of the embedding vectors (inferred if not
55-
provided).
56-
5740
Abstract Methods:
5841
sync_data: Synchronizes data between the vector store and BigQuery.
5942
get_documents: Retrieves documents based on IDs or filters.
@@ -62,21 +45,51 @@ class BaseBigQueryVectorStore(VectorStore, BaseModel, ABC):
6245
"""
6346

6447
model_config = ConfigDict(arbitrary_types_allowed=True)
48+
6549
embedding: Embeddings
50+
"""Embedding model for generating and comparing embeddings."""
51+
6652
project_id: str
53+
"""Google Cloud Project ID where BigQuery resources are located."""
54+
6755
dataset_name: str
56+
"""BigQuery dataset name."""
57+
6858
table_name: str
59+
"""BigQuery table name."""
60+
6961
location: str
62+
"""BigQuery region/location."""
63+
7064
content_field: str = "content"
65+
"""Name of the column storing document content."""
66+
7167
embedding_field: str = "embedding"
68+
"""Name of the column storing text embeddings."""
69+
7270
doc_id_field: str = "doc_id"
71+
"""Name of the column storing document IDs."""
72+
7373
temp_dataset_name: Optional[str] = None
74+
"""Name of the BigQuery dataset to be used to upload temporary BQ tables.
75+
76+
If `None`, will default to `'{dataset_name}_temp'`.
77+
"""
78+
7479
credentials: Optional[Any] = None
80+
"""Optional Google Cloud credentials object."""
81+
7582
embedding_dimension: Optional[int] = None
83+
"""Dimension of the embedding vectors (inferred if not provided)."""
84+
7685
extra_fields: Union[Dict[str, str], None] = None
86+
7787
table_schema: Any = None
88+
7889
_bq_client: Any = None
90+
7991
_logger: Any = None
92+
8093
_full_table_id: Optional[str] = None
8194

8295
@abstractmethod
@@ -249,8 +262,9 @@ def add_texts( # type: ignore[override]
249262
Args:
250263
texts: List of strings to add to the `VectorStore`.
251264
metadatas: Optional list of metadata records associated with the texts.
252-
(ie [{"url": "www.myurl1.com", "title": "title1"},
253-
{"url": "www.myurl2.com", "title": "title2"}])
265+
266+
(i.e. `[{"url": "www.myurl1.com", "title": "title1"},
267+
{"url": "www.myurl2.com", "title": "title2"}]`)
254268
255269
Returns:
256270
List of IDs from adding the texts into the `VectorStore`.
@@ -266,18 +280,19 @@ def add_texts_with_embeddings(
266280
embs: List[List[float]],
267281
metadatas: Optional[List[dict]] = None,
268282
) -> List[str]:
269-
"""Add precomputed embeddings and relative texts / metadatas to the `VectorStore`.
283+
"""Add precomputed embeddings & relative texts / metadatas to the `VectorStore`.
270284
271285
Args:
272286
ids: List of unique IDs in string format
273287
texts: List of strings to add to the `VectorStore`.
274288
embs: List of lists of floats with text embeddings for texts.
275289
metadatas: Optional list of metadata records associated with the texts.
276-
(ie `[{"url": "www.myurl1.com", "title": "title1"},
290+
291+
(i.e. `[{"url": "www.myurl1.com", "title": "title1"},
277292
{"url": "www.myurl2.com", "title": "title2"}]`)
278293
Returns:
279294
List of IDs from adding the texts into the `VectorStore`.
280-
""" # noqa: E501
295+
"""
281296
import pandas as pd
282297

283298
ids = [uuid.uuid4().hex for _ in texts]
@@ -360,21 +375,21 @@ def similarity_search_by_vectors(
360375
with_embeddings: bool = False,
361376
**kwargs: Any,
362377
) -> Any:
363-
"""Core similarity search function. Handles a list of embedding vectors,
364-
optionally returning scores and embeddings.
378+
"""Core similarity search function.
379+
380+
Handles a list of embedding vectors, optionally returning scores and embeddings.
365381
366382
Args:
367-
embeddings: A list of embedding vectors, where each vector is a list of
383+
embeddings: List of embedding vectors, where each vector is a list of
368384
floats.
369-
filter: (Optional) A dictionary specifying filtering criteria for the
370-
documents.
371-
Ie. {"title": "mytitle"}
372-
k: (Optional) The number of top-ranking similar documents to return per
373-
embedding. Defaults to 5.
374-
with_scores: (Optional) If True, include similarity scores in the result
375-
for each matched document. Defaults to False.
376-
with_embeddings: (Optional) If True, include the matched document's
377-
embedding vector in the result. Defaults to False.
385+
filter: Dictionary specifying filtering criteria for the documents.
386+
387+
i.e. `{"title": "mytitle"}`
388+
k: Number of top-ranking similar documents to return per embedding.
389+
with_scores: If `True`, include similarity scores in the result for each
390+
matched document.
391+
with_embeddings: If `True`, include the matched document's embedding vector
392+
in the result.
378393
Returns:
379394
A list of `k` documents for each embedding in `embeddings`
380395
"""
@@ -406,10 +421,11 @@ def similarity_search_by_vector(
406421
407422
Args:
408423
embedding: Embedding to look up documents similar to.
409-
filter: (Optional) A dictionary specifying filtering criteria for the
410-
documents. Ie. {"title": "mytitle"}
411-
k: (Optional) The number of top-ranking similar documents to return per
412-
embedding. Defaults to 5.
424+
filter: Dictionary specifying filtering criteria for the documents.
425+
426+
i.e. `{"title": "mytitle"}`
427+
k: Number of top-ranking similar documents to return per embedding.
428+
413429
Returns:
414430
Return docs most similar to embedding vector.
415431
"""
@@ -427,10 +443,11 @@ def similarity_search_by_vector_with_score(
427443
428444
Args:
429445
embedding: Embedding to look up documents similar to.
430-
filter: (Optional) A dictionary specifying filtering criteria for the
431-
documents. Ie. {"title": "mytitle"}
432-
k: (Optional) The number of top-ranking similar documents to return per
433-
embedding. Defaults to 5.
446+
filter: Dictionary specifying filtering criteria for the documents.
447+
448+
i.e. `{"title": "mytitle"}`
449+
k: The number of top-ranking similar documents to return per embedding.
450+
434451
Returns:
435452
Return docs most similar to embedding vector.
436453
"""
@@ -444,11 +461,12 @@ def similarity_search(
444461
"""Search for top `k` docs most similar to input query.
445462
446463
Args:
447-
query: search query to search documents with.
448-
filter: (Optional) A dictionary specifying filtering criteria for the
449-
documents. Ie. {"title": "mytitle"}
450-
k: (Optional) The number of top-ranking similar documents to return per
451-
embedding. Defaults to 5.
464+
query: Search query to search documents with.
465+
filter: Dictionary specifying filtering criteria for the documents.
466+
467+
i.e. `{"title": "mytitle"}`
468+
k: The number of top-ranking similar documents to return per embedding.
469+
452470
Returns:
453471
Return docs most similar to input query.
454472
"""
@@ -468,11 +486,12 @@ def similarity_search_with_score(
468486
scores.
469487
470488
Args:
471-
query: search query to search documents with.
472-
filter: (Optional) A dictionary specifying filtering criteria for the
473-
documents. Ie. {"title": "mytitle"}
474-
k: (Optional) The number of top-ranking similar documents to return per
475-
embedding. Defaults to 5.
489+
query: Search query to search documents with.
490+
filter: Dictionary specifying filtering criteria for the documents.
491+
492+
i.e. `{"title": "mytitle"}`
493+
k: The number of top-ranking similar documents to return per embedding.
494+
476495
Returns:
477496
Return docs most similar to input query along with scores.
478497
"""
@@ -506,20 +525,23 @@ def max_marginal_relevance_search(
506525
507526
Args:
508527
**kwargs:
509-
query: search query text.
528+
query: Search query text.
510529
filter: Filter on metadata properties, e.g.
511-
{
512-
"str_property": "foo",
513-
"int_property": 123
514-
}
515-
k: Number of Documents to return. Defaults to 5.
530+
531+
```json
532+
{
533+
"str_property": "foo",
534+
"int_property": 123
535+
}
536+
```
537+
k: Number of documents to return.
516538
fetch_k: Number of `Document` objects to fetch to pass to MMR algorithm.
517-
lambda_mult: Number between `0` and `1` that determines the degree
518-
of diversity among the results with 0 corresponding
519-
to maximum diversity and `1` to minimum diversity.
520-
Defaults to 0.5.
539+
lambda_mult: Number between `0` and `1` that determines the degree of
540+
diversity among the results with 0 corresponding to maximum diversity
541+
and `1` to minimum diversity.
542+
521543
Returns:
522-
List of Documents selected by maximal marginal relevance.
544+
List of documents selected by maximal marginal relevance.
523545
"""
524546
embedding = self.embedding.embed_query(query)
525547
return self.max_marginal_relevance_search_by_vector(
@@ -542,15 +564,19 @@ def max_marginal_relevance_search_by_vector(
542564
Args:
543565
embedding: Embedding to look up documents similar to.
544566
filter: Filter on metadata properties, e.g.
545-
{
546-
"str_property": "foo",
547-
"int_property": 123
548-
}
549-
k: Number of Documents to return.
567+
568+
```json
569+
{
570+
"str_property": "foo",
571+
"int_property": 123
572+
}
573+
```
574+
k: Number of documents to return.
550575
fetch_k: Number of `Document` objects to fetch to pass to MMR algorithm.
551576
lambda_mult: Number between `0` and `1` that determines the degree
552577
of diversity among the results with 0 corresponding
553578
to maximum diversity and `1` to minimum diversity.
579+
554580
Returns:
555581
List of Documents selected by maximal marginal relevance.
556582
"""

libs/community/langchain_google_community/bq_storage_vectorstores/bigquery.py

Lines changed: 5 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -27,16 +27,18 @@
2727

2828

2929
class BigQueryVectorStore(BaseBigQueryVectorStore):
30-
"""
31-
A vector store implementation that utilizes BigQuery and BigQuery Vector Search.
30+
"""Vector store implementation that utilizes BigQuery and BigQuery Vector Search.
3231
3332
This class provides efficient storage and retrieval of documents with vector
3433
embeddings within BigQuery. It is particularly indicated for prototyping, due the
3534
serverless nature of BigQuery, and batch retrieval.
36-
It supports similarity search, filtering, and batch operations through
35+
36+
Supports similarity search, filtering, and batch operations through
3737
`batch_search` method.
38+
3839
Optionally, this class can leverage a Vertex AI Feature Store for online serving
3940
through the `to_vertex_fs_vector_store` method.
41+
4042
Note that the `bigquery.datasets.create permission` is required even if the
4143
dataset already exists. This can be avoided by specifying `temp_dataset_name` as
4244
the name of an existing dataset.

0 commit comments

Comments
 (0)