Skip to content

Commit f36197e

Browse files
authored
[DOC] Document Search related types and methods (#5700)
## Description of changes _Summarize the changes made by this PR._ - Improvements & Bug fixes - N/A - New functionality - Derived docstrs for `Search` related Rust types ## Test plan _How are these changes tested?_ - [ ] Tests pass locally with `pytest` for python, `yarn test` for js, `cargo test` for rust ## Migration plan _Are there any migrations, or any forwards/backwards compatibility changes needed in order to make sure this change deploys reliably?_ ## Observability plan _What is the plan to instrument and monitor this change?_ ## Documentation Changes _Are all docstrings for user-facing APIs updated if required? Do we need to make documentation changes in the_ [_docs section](https://github.com/chroma-core/chroma/tree/main/docs/docs.trychroma.com)?_
1 parent 61639bb commit f36197e

File tree

6 files changed

+1387
-104
lines changed

6 files changed

+1387
-104
lines changed

clients/new-js/packages/chromadb/src/api/types.gen.ts

Lines changed: 66 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -244,6 +244,72 @@ export type IntValueType = {
244244
int_inverted_index?: null | IntInvertedIndexType;
245245
};
246246

247+
/**
248+
* Represents a field key in search queries.
249+
*
250+
* Used for both selecting fields to return and building filter expressions.
251+
* Predefined keys access special fields, while custom keys access metadata.
252+
*
253+
* # Predefined Keys
254+
*
255+
* - `Key::Document` - Document text content (`#document`)
256+
* - `Key::Embedding` - Vector embeddings (`#embedding`)
257+
* - `Key::Metadata` - All metadata fields (`#metadata`)
258+
* - `Key::Score` - Search scores (`#score`)
259+
*
260+
* # Custom Keys
261+
*
262+
* Use `Key::field()` or `Key::from()` to reference metadata fields:
263+
*
264+
* ```
265+
* use chroma_types::operator::Key;
266+
*
267+
* let key = Key::field("author");
268+
* let key = Key::from("title");
269+
* ```
270+
*
271+
* # Examples
272+
*
273+
* ## Building filters
274+
*
275+
* ```
276+
* use chroma_types::operator::Key;
277+
*
278+
* // Equality
279+
* let filter = Key::field("status").eq("published");
280+
*
281+
* // Comparisons
282+
* let filter = Key::field("year").gte(2020);
283+
* let filter = Key::field("score").lt(0.9);
284+
*
285+
* // Set operations
286+
* let filter = Key::field("category").is_in(vec!["tech", "science"]);
287+
* let filter = Key::field("status").not_in(vec!["deleted", "archived"]);
288+
*
289+
* // Document content
290+
* let filter = Key::Document.contains("machine learning");
291+
* let filter = Key::Document.regex(r"\bAPI\b");
292+
*
293+
* // Combining filters
294+
* let filter = Key::field("status").eq("published")
295+
* & Key::field("year").gte(2020);
296+
* ```
297+
*
298+
* ## Selecting fields
299+
*
300+
* ```
301+
* use chroma_types::plan::SearchPayload;
302+
* use chroma_types::operator::Key;
303+
*
304+
* let search = SearchPayload::default()
305+
* .select([
306+
* Key::Document,
307+
* Key::Score,
308+
* Key::field("title"),
309+
* Key::field("author"),
310+
* ]);
311+
* ```
312+
*/
247313
export type Key = 'Document' | 'Embedding' | 'Metadata' | 'Score' | {
248314
MetadataField: string;
249315
};

rust/chroma/src/client/chroma_http_client.rs

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -91,10 +91,10 @@ static METRICS: std::sync::LazyLock<crate::client::metrics::Metrics> =
9191
/// # Examples
9292
///
9393
/// ```
94-
/// use chroma::{ChromaHttpClient, client::ChromaClientOptions, client::ChromaAuthMethod};
94+
/// use chroma::{ChromaHttpClient, client::ChromaHttpClientOptions, client::ChromaAuthMethod};
9595
///
9696
/// # async fn example() -> Result<(), Box<dyn std::error::Error>> {
97-
/// let options = ChromaClientOptions {
97+
/// let options = ChromaHttpClientOptions {
9898
/// endpoint: "https://api.trychroma.com".parse()?,
9999
/// auth_method: ChromaAuthMethod::cloud_api_key("my-key")?,
100100
/// ..Default::default()
@@ -158,10 +158,10 @@ impl ChromaHttpClient {
158158
/// # Examples
159159
///
160160
/// ```
161-
/// use chroma::{ChromaHttpClient, client::ChromaClientOptions, client::ChromaAuthMethod};
161+
/// use chroma::{ChromaHttpClient, client::ChromaHttpClientOptions, client::ChromaAuthMethod};
162162
///
163163
/// # fn example() -> Result<(), Box<dyn std::error::Error>> {
164-
/// let options = ChromaClientOptions {
164+
/// let options = ChromaHttpClientOptions {
165165
/// endpoint: "https://api.trychroma.com".parse()?,
166166
/// auth_method: ChromaAuthMethod::cloud_api_key("my-key")?,
167167
/// ..Default::default()

rust/chroma/src/client/options.rs

Lines changed: 6 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -157,10 +157,10 @@ impl ChromaHttpClientOptions {
157157
/// # Examples
158158
///
159159
/// ```no_run
160-
/// use chroma::client::ChromaClientOptions;
160+
/// use chroma::client::ChromaHttpClientOptions;
161161
///
162162
/// # fn example() -> Result<(), Box<dyn std::error::Error>> {
163-
/// let options = ChromaClientOptions::from_env()?;
163+
/// let options = ChromaHttpClientOptions::from_env()?;
164164
/// # Ok(())
165165
/// # }
166166
/// ```
@@ -200,10 +200,10 @@ impl ChromaHttpClientOptions {
200200
/// # Examples
201201
///
202202
/// ```no_run
203-
/// use chroma::client::ChromaClientOptions;
203+
/// use chroma::client::ChromaHttpClientOptions;
204204
///
205205
/// # fn example() -> Result<(), Box<dyn std::error::Error>> {
206-
/// let options = ChromaClientOptions::from_cloud_env()?;
206+
/// let options = ChromaHttpClientOptions::from_cloud_env()?;
207207
/// # Ok(())
208208
/// # }
209209
/// ```
@@ -241,10 +241,10 @@ impl ChromaHttpClientOptions {
241241
/// # Examples
242242
///
243243
/// ```
244-
/// use chroma::client::ChromaClientOptions;
244+
/// use chroma::client::ChromaHttpClientOptions;
245245
///
246246
/// # fn example() -> Result<(), Box<dyn std::error::Error>> {
247-
/// let options = ChromaClientOptions::cloud("my-api-key", "production-db")?;
247+
/// let options = ChromaHttpClientOptions::cloud("my-api-key", "production-db")?;
248248
/// # Ok(())
249249
/// # }
250250
/// ```

rust/chroma/src/collection.rs

Lines changed: 121 additions & 64 deletions
Original file line numberDiff line numberDiff line change
@@ -42,8 +42,8 @@ use crate::{client::ChromaHttpClientError, ChromaHttpClient};
4242
///
4343
/// ```
4444
/// # use chroma::collection::ChromaCollection;
45-
/// # use chroma::client::ChromaClientError;
46-
/// # async fn example(collection: ChromaCollection) -> Result<(), ChromaClientError> {
45+
/// # use chroma::client::ChromaHttpClientError;
46+
/// # async fn example(collection: ChromaCollection) -> Result<(), ChromaHttpClientError> {
4747
/// let count = collection.count().await?;
4848
/// println!("Collection contains {} records", count);
4949
///
@@ -325,87 +325,144 @@ impl ChromaCollection {
325325
self.send("query", Method::POST, Some(request)).await
326326
}
327327

328-
/// Executes advanced search with multiple search payloads in a single request.
328+
/// Performs hybrid search on the collection using the Search API.
329329
///
330-
/// Each [`SearchPayload`] can specify distinct query vectors, filters, and result counts,
331-
/// enabling efficient batch similarity search with heterogeneous parameters.
330+
/// The Search API provides a powerful, flexible interface for vector similarity search
331+
/// combined with metadata filtering and custom ranking expressions.
332332
///
333-
/// # Errors
333+
/// # Arguments
334334
///
335-
/// Returns an error if:
336-
/// - Any search payload fails validation
337-
/// - Network communication fails
335+
/// * `searches` - One or more search payloads to execute in a single request
336+
///
337+
/// # Returns
338+
///
339+
/// A `SearchResponse` containing results for each search payload
338340
///
339341
/// # Examples
340342
///
341-
/// Basic search with default parameters:
343+
/// ## Basic similarity search
344+
///
342345
/// ```
343-
/// # use chroma::collection::ChromaCollection;
344-
/// # use chroma_types::plan::SearchPayload;
345-
/// # async fn example(collection: ChromaCollection) -> Result<(), Box<dyn std::error::Error>> {
346-
/// let search1 = SearchPayload {
347-
/// filter: Default::default(),
348-
/// rank: Default::default(),
349-
/// limit: Default::default(),
350-
/// select: Default::default(),
346+
/// use chroma_types::plan::SearchPayload;
347+
/// use chroma_types::operator::{RankExpr, QueryVector, Key};
348+
///
349+
/// # async fn example(collection: &chroma::collection::ChromaCollection) -> Result<(), Box<dyn std::error::Error>> {
350+
/// // Search with a query vector
351+
/// let search = SearchPayload::default()
352+
/// .rank(RankExpr::Knn {
353+
/// query: QueryVector::Dense(vec![0.1, 0.2, 0.3]),
354+
/// key: Key::Embedding,
355+
/// limit: 100,
356+
/// default: None,
357+
/// return_rank: false,
358+
/// })
359+
/// .limit(Some(10), 0)
360+
/// .select([Key::Document, Key::Score]);
361+
///
362+
/// let results = collection.search(vec![search]).await?;
363+
/// # Ok(())
364+
/// # }
365+
/// ```
366+
///
367+
/// ## Filtered search with metadata
368+
///
369+
/// ```
370+
/// use chroma_types::plan::SearchPayload;
371+
/// use chroma_types::operator::{RankExpr, QueryVector, Key};
372+
///
373+
/// # async fn example(collection: &chroma::collection::ChromaCollection) -> Result<(), Box<dyn std::error::Error>> {
374+
/// // Filter by category and year, then rank by similarity
375+
/// let search = SearchPayload::default()
376+
/// .r#where(
377+
/// Key::field("category").eq("science")
378+
/// & Key::field("year").gte(2020)
379+
/// )
380+
/// .rank(RankExpr::Knn {
381+
/// query: QueryVector::Dense(vec![0.1, 0.2, 0.3]),
382+
/// key: Key::Embedding,
383+
/// limit: 200,
384+
/// default: None,
385+
/// return_rank: false,
386+
/// })
387+
/// .limit(Some(5), 0)
388+
/// .select([Key::Document, Key::Score, Key::field("title")]);
389+
///
390+
/// let results = collection.search(vec![search]).await?;
391+
/// # Ok(())
392+
/// # }
393+
/// ```
394+
///
395+
/// ## Hybrid search with custom ranking
396+
///
397+
/// ```
398+
/// use chroma_types::plan::SearchPayload;
399+
/// use chroma_types::operator::{RankExpr, QueryVector, Key};
400+
///
401+
/// # async fn example(collection: &chroma::collection::ChromaCollection) -> Result<(), Box<dyn std::error::Error>> {
402+
/// // Combine two KNN searches with weights
403+
/// let dense_knn = RankExpr::Knn {
404+
/// query: QueryVector::Dense(vec![0.1, 0.2, 0.3]),
405+
/// key: Key::Embedding,
406+
/// limit: 200,
407+
/// default: None,
408+
/// return_rank: false,
351409
/// };
352-
/// let search2 = SearchPayload {
353-
/// filter: Default::default(),
354-
/// rank: Default::default(),
355-
/// limit: Default::default(),
356-
/// select: Default::default(),
410+
///
411+
/// let sparse_knn = RankExpr::Knn {
412+
/// query: QueryVector::Dense(vec![0.1, 0.2, 0.3]), // Use sparse vector in practice
413+
/// key: Key::field("sparse_embedding"),
414+
/// limit: 200,
415+
/// default: None,
416+
/// return_rank: false,
357417
/// };
358418
///
359-
/// let response = collection.search(vec![search1, search2]).await?;
360-
/// println!("Executed {} searches", response.ids.len());
419+
/// // Weighted combination: 70% dense + 30% sparse
420+
/// let hybrid_rank = dense_knn * 0.7 + sparse_knn * 0.3;
421+
///
422+
/// let search = SearchPayload::default()
423+
/// .rank(hybrid_rank)
424+
/// .limit(Some(10), 0)
425+
/// .select([Key::Document, Key::Score]);
426+
///
427+
/// let results = collection.search(vec![search]).await?;
361428
/// # Ok(())
362429
/// # }
363430
/// ```
364431
///
365-
/// Advanced search with all fields configured:
432+
/// ## Batch operations
433+
///
366434
/// ```
367-
/// # use chroma::collection::ChromaCollection;
368-
/// # use chroma_types::plan::SearchPayload;
369-
/// # use chroma_types::{Filter, Rank, RankExpr, QueryVector, Key, Limit, Select};
370-
/// # use chroma_types::{Where, MetadataExpression, MetadataComparison, PrimitiveOperator, MetadataValue};
371-
/// # use std::collections::HashSet;
372-
/// # async fn example(collection: ChromaCollection) -> Result<(), Box<dyn std::error::Error>> {
373-
/// let search = SearchPayload {
374-
/// filter: Filter {
375-
/// query_ids: Some(vec!["doc1".to_string(), "doc2".to_string()]),
376-
/// where_clause: Some(Where::Metadata(MetadataExpression {
377-
/// key: "category".to_string(),
378-
/// comparison: MetadataComparison::Primitive(
379-
/// PrimitiveOperator::Equal,
380-
/// MetadataValue::Str("research".to_string()),
381-
/// ),
382-
/// })),
383-
/// },
384-
/// rank: Rank {
385-
/// expr: Some(RankExpr::Knn {
386-
/// query: QueryVector::Dense(vec![0.1, 0.2, 0.3, 0.4]),
435+
/// use chroma_types::plan::SearchPayload;
436+
/// use chroma_types::operator::{RankExpr, QueryVector, Key};
437+
///
438+
/// # async fn example(collection: &chroma::collection::ChromaCollection) -> Result<(), Box<dyn std::error::Error>> {
439+
/// // Run multiple searches in one request
440+
/// let searches = vec![
441+
/// SearchPayload::default()
442+
/// .r#where(Key::field("category").eq("tech"))
443+
/// .rank(RankExpr::Knn {
444+
/// query: QueryVector::Dense(vec![0.1, 0.2, 0.3]),
387445
/// key: Key::Embedding,
388-
/// limit: 50,
446+
/// limit: 100,
389447
/// default: None,
390448
/// return_rank: false,
391-
/// }),
392-
/// },
393-
/// limit: Limit {
394-
/// offset: 0,
395-
/// limit: Some(10),
396-
/// },
397-
/// select: Select {
398-
/// keys: HashSet::from([
399-
/// Key::Document,
400-
/// Key::Metadata,
401-
/// Key::Embedding,
402-
/// Key::Score,
403-
/// ]),
404-
/// },
405-
/// };
449+
/// })
450+
/// .limit(Some(5), 0),
451+
/// SearchPayload::default()
452+
/// .r#where(Key::field("category").eq("science"))
453+
/// .rank(RankExpr::Knn {
454+
/// query: QueryVector::Dense(vec![0.1, 0.2, 0.3]),
455+
/// key: Key::Embedding,
456+
/// limit: 100,
457+
/// default: None,
458+
/// return_rank: false,
459+
/// })
460+
/// .limit(Some(5), 0),
461+
/// ];
406462
///
407-
/// let response = collection.search(vec![search]).await?;
408-
/// println!("Found {} results", response.ids[0].len());
463+
/// let results = collection.search(searches).await?;
464+
/// // results.results[0] contains first search results
465+
/// // results.results[1] contains second search results
409466
/// # Ok(())
410467
/// # }
411468
/// ```

0 commit comments

Comments
 (0)