From 0c370a97667fb64b4ccd9c1ca8b72ddf11815a3c Mon Sep 17 00:00:00 2001 From: Monan Date: Thu, 15 May 2025 13:53:41 +0800 Subject: [PATCH 1/4] =?UTF-8?q?=E6=9B=B4=E6=96=B0Python=E7=89=88=E6=9C=AC?= =?UTF-8?q?=E8=87=B33.9.18=EF=BC=8C=E5=B9=B6=E5=9C=A8chroma=5Fadd=5Fdocume?= =?UTF-8?q?nts=E5=87=BD=E6=95=B0=E4=B8=AD=E5=A2=9E=E5=BC=BA=E4=BA=86ID?= =?UTF-8?q?=E5=8F=82=E6=95=B0=E7=9A=84=E9=AA=8C=E8=AF=81=EF=BC=8C=E7=A1=AE?= =?UTF-8?q?=E4=BF=9DID=E5=88=97=E8=A1=A8=E4=B8=8D=E4=B8=BA=E7=A9=BA?= =?UTF-8?q?=E4=B8=94=E4=B8=8E=E6=96=87=E6=A1=A3=E6=95=B0=E9=87=8F=E5=8C=B9?= =?UTF-8?q?=E9=85=8D=EF=BC=8C=E5=90=8C=E6=97=B6=E6=B7=BB=E5=8A=A0=E4=BA=86?= =?UTF-8?q?=E5=AF=B9=E9=87=8D=E5=A4=8DID=E7=9A=84=E6=A3=80=E6=9F=A5?= =?UTF-8?q?=E5=92=8C=E8=BF=94=E5=9B=9E=E5=80=BC=E7=9A=84=E5=A4=84=E7=90=86?= =?UTF-8?q?=E3=80=82?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .python-version | 2 +- src/chroma_mcp/server.py | 43 ++++++++++++++++++++++++++++++++-------- 2 files changed, 36 insertions(+), 9 deletions(-) diff --git a/.python-version b/.python-version index c8cfe39..43077b2 100644 --- a/.python-version +++ b/.python-version @@ -1 +1 @@ -3.10 +3.9.18 diff --git a/src/chroma_mcp/server.py b/src/chroma_mcp/server.py index b2f728f..2e9c157 100644 --- a/src/chroma_mcp/server.py +++ b/src/chroma_mcp/server.py @@ -377,35 +377,62 @@ async def chroma_delete_collection(collection_name: str) -> str: async def chroma_add_documents( collection_name: str, documents: List[str], - metadatas: Optional[List[Dict]] = None, - ids: Optional[List[str]] = None + ids: List[str], + metadatas: List[Dict] ) -> str: """Add documents to a Chroma collection. Args: collection_name: Name of the collection to add documents to documents: List of text documents to add + ids: List of IDs for the documents (required) metadatas: Optional list of metadata dictionaries for each document - ids: Optional list of IDs for the documents """ if not documents: raise ValueError("The 'documents' list cannot be empty.") + + if not ids: + raise ValueError("The 'ids' list is required and cannot be empty.") + + # 检查ids列表中是否有空字符串 + if any(not id.strip() for id in ids): + raise ValueError("IDs cannot be empty strings.") + + if len(ids) != len(documents): + raise ValueError(f"Number of ids ({len(ids)}) must match number of documents ({len(documents)}).") client = get_chroma_client() try: collection = client.get_or_create_collection(collection_name) - # Generate sequential IDs if none provided - if ids is None: - ids = [str(i) for i in range(len(documents))] + # 检查是否有重复的ID + existing_ids = collection.get(include=[])["ids"] + duplicate_ids = [id for id in ids if id in existing_ids] + + if duplicate_ids: + raise ValueError( + f"The following IDs already exist in collection '{collection_name}': {duplicate_ids}. " + f"Use 'chroma_update_documents' to update existing documents." + ) - collection.add( + result = collection.add( documents=documents, metadatas=metadatas, ids=ids ) - return f"Successfully added {len(documents)} documents to collection {collection_name}" + # 判断返回值 + if result and isinstance(result, dict): + # 如果返回值是字典,可能包含成功信息 + if 'success' in result and not result['success']: + raise Exception(f"Failed to add documents: {result.get('error', 'Unknown error')}") + + # 如果返回值包含实际添加的数量 + if 'count' in result: + return f"Successfully added {result['count']} documents to collection {collection_name}" + + # 默认返回 + return f"Successfully added {len(documents)} documents to collection {collection_name}, result is {result}" except Exception as e: raise Exception(f"Failed to add documents to collection '{collection_name}': {str(e)}") from e From 380784f4720a6d5ad97a00de3b0c4124780da127 Mon Sep 17 00:00:00 2001 From: Monan Date: Thu, 15 May 2025 15:35:05 +0800 Subject: [PATCH 2/4] =?UTF-8?q?=E5=8E=BB=E6=8E=89Optional=E4=BF=AE?= =?UTF-8?q?=E9=A5=B0=EF=BC=8C=E8=A7=A3=E5=86=B3MCP=E5=8F=82=E6=95=B0?= =?UTF-8?q?=E4=B8=8D=E8=AF=86=E5=88=AB=E9=97=AE=E9=A2=98?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- src/chroma_mcp/server.py | 62 ++++++++++++++++++++-------------------- 1 file changed, 31 insertions(+), 31 deletions(-) diff --git a/src/chroma_mcp/server.py b/src/chroma_mcp/server.py index 2e9c157..79c4327 100644 --- a/src/chroma_mcp/server.py +++ b/src/chroma_mcp/server.py @@ -1,4 +1,4 @@ -from typing import Dict, List, Optional, TypedDict +from typing import Dict, List, TypedDict from enum import Enum import chromadb from mcp.server.fastmcp import FastMCP @@ -145,8 +145,8 @@ def get_chroma_client(args=None): @mcp.tool() async def chroma_list_collections( - limit: Optional[int] = None, - offset: Optional[int] = None + limit: int = None, + offset: int = None ) -> List[str]: """List all collection names in the Chroma database with pagination support. @@ -177,16 +177,16 @@ async def chroma_list_collections( @mcp.tool() async def chroma_create_collection( collection_name: str, - embedding_function_name: Optional[str] = "default", - metadata: Optional[Dict] = None, - space: Optional[str] = None, - ef_construction: Optional[int] = None, - ef_search: Optional[int] = None, - max_neighbors: Optional[int] = None, - num_threads: Optional[int] = None, - batch_size: Optional[int] = None, - sync_threshold: Optional[int] = None, - resize_factor: Optional[float] = None, + embedding_function_name: str = "default", + metadata: Dict = None, + space: str = None, + ef_construction: int = None, + ef_search: int = None, + max_neighbors: int = None, + num_threads: int = None, + batch_size: int = None, + sync_threshold: int = None, + resize_factor: float = None, ) -> str: """Create a new Chroma collection with configurable HNSW parameters. @@ -305,13 +305,13 @@ async def chroma_get_collection_count(collection_name: str) -> int: @mcp.tool() async def chroma_modify_collection( collection_name: str, - new_name: Optional[str] = None, - new_metadata: Optional[Dict] = None, - ef_search: Optional[int] = None, - num_threads: Optional[int] = None, - batch_size: Optional[int] = None, - sync_threshold: Optional[int] = None, - resize_factor: Optional[float] = None, + new_name: str = None, + new_metadata: Dict = None, + ef_search: int = None, + num_threads: int = None, + batch_size: int = None, + sync_threshold: int = None, + resize_factor: float = None, ) -> str: """Modify a Chroma collection's name or metadata. @@ -378,7 +378,7 @@ async def chroma_add_documents( collection_name: str, documents: List[str], ids: List[str], - metadatas: List[Dict] + metadatas: List[Dict] = None ) -> str: """Add documents to a Chroma collection. @@ -441,8 +441,8 @@ async def chroma_query_documents( collection_name: str, query_texts: List[str], n_results: int = 5, - where: Optional[Dict] = None, - where_document: Optional[Dict] = None, + where: Dict = None, + where_document: Dict = None, include: List[str] = ["documents", "metadatas", "distances"] ) -> Dict: """Query documents from a Chroma collection with advanced filtering. @@ -479,12 +479,12 @@ async def chroma_query_documents( @mcp.tool() async def chroma_get_documents( collection_name: str, - ids: Optional[List[str]] = None, - where: Optional[Dict] = None, - where_document: Optional[Dict] = None, + ids: List[str] = None, + where: Dict = None, + where_document: Dict = None, include: List[str] = ["documents", "metadatas"], - limit: Optional[int] = None, - offset: Optional[int] = None + limit: int = None, + offset: int = None ) -> Dict: """Get documents from a Chroma collection with optional filtering. @@ -523,9 +523,9 @@ async def chroma_get_documents( async def chroma_update_documents( collection_name: str, ids: List[str], - embeddings: Optional[List[List[float]]] = None, - metadatas: Optional[List[Dict]] = None, - documents: Optional[List[str]] = None + embeddings: List[List[float]] = None, + metadatas: List[Dict] = None, + documents: List[str] = None ) -> str: """Update documents in a Chroma collection. From 7d50fed8685afeb9eb38582d72d91032c4834c44 Mon Sep 17 00:00:00 2001 From: Monan Date: Thu, 15 May 2025 15:42:53 +0800 Subject: [PATCH 3/4] Revert python version change --- .python-version | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.python-version b/.python-version index 43077b2..c8cfe39 100644 --- a/.python-version +++ b/.python-version @@ -1 +1 @@ -3.9.18 +3.10 From 8723a47368d7f16af9ef064081fbbdc674c7719a Mon Sep 17 00:00:00 2001 From: Monan Date: Thu, 15 May 2025 18:10:22 +0800 Subject: [PATCH 4/4] English comments --- src/chroma_mcp/server.py | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/src/chroma_mcp/server.py b/src/chroma_mcp/server.py index 79c4327..dbe8ddf 100644 --- a/src/chroma_mcp/server.py +++ b/src/chroma_mcp/server.py @@ -394,7 +394,7 @@ async def chroma_add_documents( if not ids: raise ValueError("The 'ids' list is required and cannot be empty.") - # 检查ids列表中是否有空字符串 + # Check if there are empty strings in the ids list if any(not id.strip() for id in ids): raise ValueError("IDs cannot be empty strings.") @@ -405,7 +405,7 @@ async def chroma_add_documents( try: collection = client.get_or_create_collection(collection_name) - # 检查是否有重复的ID + # Check for duplicate IDs existing_ids = collection.get(include=[])["ids"] duplicate_ids = [id for id in ids if id in existing_ids] @@ -421,17 +421,17 @@ async def chroma_add_documents( ids=ids ) - # 判断返回值 + # Check the return value if result and isinstance(result, dict): - # 如果返回值是字典,可能包含成功信息 + # If the return value is a dictionary, it may contain success information if 'success' in result and not result['success']: raise Exception(f"Failed to add documents: {result.get('error', 'Unknown error')}") - # 如果返回值包含实际添加的数量 + # If the return value contains the actual number added if 'count' in result: return f"Successfully added {result['count']} documents to collection {collection_name}" - # 默认返回 + # Default return return f"Successfully added {len(documents)} documents to collection {collection_name}, result is {result}" except Exception as e: raise Exception(f"Failed to add documents to collection '{collection_name}': {str(e)}") from e