Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion cookbook/demo/agno_knowledge_agent.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@
description="A knowledge base containing Agno documentation and other relevant information",
vector_db=PgVector(
db_url=db_url,
table_name="agno_knowledge",
table_name="agno_knowledges",
search_type=SearchType.hybrid,
embedder=OpenAIEmbedder(id="text-embedding-3-small"),
),
Expand Down
97 changes: 88 additions & 9 deletions libs/agno/agno/knowledge/utils.py
Original file line number Diff line number Diff line change
@@ -1,8 +1,11 @@
import importlib
import inspect
import re
from typing import Dict, List

from agno.knowledge.reader.reader_factory import ReaderFactory
from agno.knowledge.types import ContentType
from agno.utils.log import log_debug
from agno.utils.log import log_debug, log_info


def _get_chunker_class(strategy_type):
Expand Down Expand Up @@ -48,21 +51,21 @@ def _import_class(module_name: str, class_name: str):

def get_reader_info(reader_key: str) -> Dict:
"""Get information about a reader without instantiating it."""
# Try to create the reader to get its info, but don't cache it
try:
reader_factory_method = ReaderFactory._get_reader_method(reader_key)

# Create an instance to get the class, then call class methods
reader_instance = reader_factory_method()
reader_class = reader_instance.__class__
# Get the reader class directly without instantiation
reader_class = _get_reader_class(reader_key)

# Call class methods directly
supported_strategies = reader_class.get_supported_chunking_strategies()
supported_content_types = reader_class.get_supported_content_types()

# Get description from the factory method's config
description = _get_reader_description(reader_key)

return {
"id": reader_key,
"name": "".join(word.capitalize() for word in reader_key.split("_")) + "Reader",
"description": reader_instance.description,
"description": description,
"chunking_strategies": [
strategy.value for strategy in supported_strategies
], # Convert enums to string values
Expand All @@ -75,6 +78,82 @@ def get_reader_info(reader_key: str) -> Dict:
raise ValueError(f"Unknown reader: {reader_key}. Error: {str(e)}")


def _get_reader_class(reader_key: str):
"""Get the reader class without instantiating it using auto-discovery."""
# First check if the reader key is valid by checking ReaderFactory
method_name = f"_get_{reader_key}_reader"
if not hasattr(ReaderFactory, method_name):
raise ValueError(f"Unknown reader: {reader_key}")

# Use naming conventions to construct module and class names
# Convert reader_key to module name: "field_labeled_csv" -> "field_labeled_csv_reader"
module_name = f"agno.knowledge.reader.{reader_key}_reader"

# Convert reader_key to class name: "field_labeled_csv" -> "FieldLabeledCsvReader"
# Handle special cases and convert to PascalCase
class_name = _reader_key_to_class_name(reader_key)

try:
# Dynamically import the class
module = importlib.import_module(module_name)
reader_class = getattr(module, class_name)

return reader_class

except (ImportError, AttributeError) as e:
raise ValueError(f"Failed to import reader class for {reader_key}: {str(e)}")


def _reader_key_to_class_name(reader_key: str) -> str:
"""Convert reader key to class name using naming conventions."""
# Special case mappings for readers that use acronym capitalization
# These maintain backward compatibility with existing public API class names
special_cases = {
"pptx": "PPTXReader", # PPTX acronym is all caps
"csv": "CSVReader", # CSV acronym is all caps
"json": "JSONReader", # JSON acronym is all caps
"pdf": "PDFReader", # PDF acronym is all caps
}

if reader_key in special_cases:
return special_cases[reader_key]

# Default: convert snake_case to PascalCase and add "Reader" suffix
# Examples: "field_labeled_csv" -> "FieldLabeledCsvReader"
# "web_search" -> "WebSearchReader"
# "docx" -> "DocxReader"
words = reader_key.split("_")
class_name = "".join(word.capitalize() for word in words) + "Reader"

return class_name


def _get_reader_description(reader_key: str) -> str:
"""Get the description from the factory method's configuration."""
try:
# Get the factory method
method_name = f"_get_{reader_key}_reader"
if not hasattr(ReaderFactory, method_name):
return f"Reader for {reader_key} files"

# Use source inspection to extract the description from the config dict
method = getattr(ReaderFactory, method_name)
source = inspect.getsource(method)

# Look for the description in the config dict
# Pattern: "description": "Some description text"
desc_match = re.search(r'"description":\s*"([^"]+)"', source)

if desc_match:
return desc_match.group(1)
else:
return f"Reader for {reader_key} files"

except Exception:
# Fallback to generic description if extraction fails
return f"Reader for {reader_key} files"


def get_all_readers_info() -> List[Dict]:
"""Get information about all available readers."""
readers_info = []
Expand All @@ -86,7 +165,7 @@ def get_all_readers_info() -> List[Dict]:
except ValueError as e:
# Skip readers with missing dependencies or other issues
# Log the error but don't fail the entire request
log_debug(f"Skipping reader '{key}': {e}")
log_info(f"Skipping reader '{key}': {e}")
continue
return readers_info

Expand Down