Skip to content

Enable following symlinks #13

@Stratouklos

Description

@Stratouklos

Love the project! It worked nicely however in my current setup I found it very useful to enable following symlinks for documents.

Example diff (vibe coded) please check for correctness.

diff --git a/mcp_server/config.py b/mcp_server/config.py
index a91aca6..945a24f 100644
--- a/mcp_server/config.py
+++ b/mcp_server/config.py
@@ -17,7 +17,7 @@ def _has_documents(path: Path) -> bool:
     docs_dir = path / "documents"
     if not docs_dir.exists():
         return False
-    return any(docs_dir.rglob("*.*"))
+    return any(f for _, _, files in os.walk(docs_dir, followlinks=True) for f in files)
 
 
 if os.environ.get("KNOWLEDGE_RAG_DIR"):
diff --git a/mcp_server/ingestion.py b/mcp_server/ingestion.py
index 450b1ce..e028ece 100644
--- a/mcp_server/ingestion.py
+++ b/mcp_server/ingestion.py
@@ -7,6 +7,7 @@ Supports: MD, PDF, TXT, PY, JSON, DOCX, XLSX, PPTX, CSV
 import hashlib
 import json
 import re
+import os
 from dataclasses import dataclass, field
 from datetime import datetime
 from pathlib import Path
@@ -150,19 +151,26 @@ class DocumentParser:
         return doc
 
     def parse_directory(self, directory: Path = None) -> List[Document]:
-        """Parse all supported files in a directory recursively"""
+        """Parse all supported files in a directory recursively (follows symlinks)"""
         directory = Path(directory) if directory else config.documents_dir
         documents = []
+        seen = set()
 
-        for ext in config.supported_formats:
-            for filepath in directory.rglob(f"*{ext}"):
-                try:
-                    doc = self.parse_file(filepath)
-                    if doc:
-                        documents.append(doc)
-                except Exception as e:
-                    print(f"[WARN] Failed to parse {filepath}: {e}")
-
+        for root, dirs, files in os.walk(directory, followlinks=True):
+            root_path = Path(root)
+            real_root = root_path.resolve()
+            if real_root in seen:
+                continue
+            seen.add(real_root)
+            for fname in files:
+                filepath = root_path / fname
+                if filepath.suffix.lower() in config.supported_formats:
+                    try:
+                        doc = self.parse_file(filepath)
+                        if doc:
+                            documents.append(doc)
+                    except Exception as e:
+                        print(f"[WARN] Failed to parse {filepath}: {e}")
         return documents
 
     # =========================================================================

Thank you!!!

Metadata

Metadata

Assignees

No one assigned

    Labels

    No labels
    No labels

    Projects

    No projects

    Milestone

    No milestone

    Relationships

    None yet

    Development

    No branches or pull requests

    Issue actions