Love the project! It worked nicely however in my current setup I found it very useful to enable following symlinks for documents.
Example diff (vibe coded) please check for correctness.
diff --git a/mcp_server/config.py b/mcp_server/config.py
index a91aca6..945a24f 100644
--- a/mcp_server/config.py
+++ b/mcp_server/config.py
@@ -17,7 +17,7 @@ def _has_documents(path: Path) -> bool:
docs_dir = path / "documents"
if not docs_dir.exists():
return False
- return any(docs_dir.rglob("*.*"))
+ return any(f for _, _, files in os.walk(docs_dir, followlinks=True) for f in files)
if os.environ.get("KNOWLEDGE_RAG_DIR"):
diff --git a/mcp_server/ingestion.py b/mcp_server/ingestion.py
index 450b1ce..e028ece 100644
--- a/mcp_server/ingestion.py
+++ b/mcp_server/ingestion.py
@@ -7,6 +7,7 @@ Supports: MD, PDF, TXT, PY, JSON, DOCX, XLSX, PPTX, CSV
import hashlib
import json
import re
+import os
from dataclasses import dataclass, field
from datetime import datetime
from pathlib import Path
@@ -150,19 +151,26 @@ class DocumentParser:
return doc
def parse_directory(self, directory: Path = None) -> List[Document]:
- """Parse all supported files in a directory recursively"""
+ """Parse all supported files in a directory recursively (follows symlinks)"""
directory = Path(directory) if directory else config.documents_dir
documents = []
+ seen = set()
- for ext in config.supported_formats:
- for filepath in directory.rglob(f"*{ext}"):
- try:
- doc = self.parse_file(filepath)
- if doc:
- documents.append(doc)
- except Exception as e:
- print(f"[WARN] Failed to parse {filepath}: {e}")
-
+ for root, dirs, files in os.walk(directory, followlinks=True):
+ root_path = Path(root)
+ real_root = root_path.resolve()
+ if real_root in seen:
+ continue
+ seen.add(real_root)
+ for fname in files:
+ filepath = root_path / fname
+ if filepath.suffix.lower() in config.supported_formats:
+ try:
+ doc = self.parse_file(filepath)
+ if doc:
+ documents.append(doc)
+ except Exception as e:
+ print(f"[WARN] Failed to parse {filepath}: {e}")
return documents
# =========================================================================
Love the project! It worked nicely however in my current setup I found it very useful to enable following symlinks for documents.
Example diff (vibe coded) please check for correctness.
Thank you!!!