fix: Use configured LLM model in categorization instead of hardcoded gpt-4o-mini

Ronak Bhalgami · Ronak Bhalgami · commit 32fceb644741 · 2025-10-19T20:37:14.000+05:30
- Fixed hardcoded model issue in openmemory categorization - Categorization now reads LLM config from database - Supports custom base_url for OpenAI-compatible providers (SiliconFlow, etc.) - Added SiliconFlowConfig class for proper configuration - Updated factory to use SiliconFlowConfig - Added SiliconFlow documentation with usage examples Fixes issue where SiliconFlow users got 'Model does not exist' error because categorization was hardcoded to use gpt-4o-mini instead of the configured model like deepseek-ai/DeepSeek-R1. Resolves: mem0ai#3574
diff --git a/docs/components/llms/models/siliconflow.mdx b/docs/components/llms/models/siliconflow.mdx
@@ -0,0 +1,97 @@
+---
+title: SiliconFlow
+---
+
+[SiliconFlow](https://siliconflow.com/) is an AI inference platform that provides access to various open-source LLMs including DeepSeek, Qwen, GLM, and more.
+
+In order to use LLMs from SiliconFlow, go to their [platform](https://siliconflow.com/) and get the API key. Set the API key as `SILICONFLOW_API_KEY` environment variable to use the model as given below in the example.
+
+## Usage
+
+<CodeGroup>
+```python Python
+import os
+from mem0 import Memory
+
+os.environ["OPENAI_API_KEY"] = "your-api-key" # used for embedding model
+os.environ["SILICONFLOW_API_KEY"] = "your-api-key"
+
+config = {
+    "llm": {
+        "provider": "siliconflow",
+        "config": {
+            "model": "deepseek-ai/DeepSeek-V3",
+            "temperature": 0.1,
+            "max_tokens": 2000,
+        }
+    }
+}
+
+m = Memory.from_config(config)
+messages = [
+    {"role": "user", "content": "I'm planning to watch a movie tonight. Any recommendations?"},
+    {"role": "assistant", "content": "How about thriller movies? They can be quite engaging."},
+    {"role": "user", "content": "I'm not a big fan of thriller movies but I love sci-fi movies."},
+    {"role": "assistant", "content": "Got it! I'll avoid thriller recommendations and suggest sci-fi movies in the future."}
+]
+m.add(messages, user_id="alice", metadata={"category": "movies"})
+```
+
+```typescript TypeScript
+import { Memory } from 'mem0ai/oss';
+
+const config = {
+  llm: {
+    provider: 'siliconflow',
+    config: {
+      apiKey: process.env.SILICONFLOW_API_KEY || '',
+      model: 'deepseek-ai/DeepSeek-V3',
+      temperature: 0.1,
+      maxTokens: 2000,
+    },
+  },
+};
+
+const memory = new Memory(config);
+const messages = [
+    {"role": "user", "content": "I'm planning to watch a movie tonight. Any recommendations?"},
+    {"role": "assistant", "content": "How about thriller movies? They can be quite engaging."},
+    {"role": "user", "content": "I'm not a big fan of thriller movies but I love sci-fi movies."},
+    {"role": "assistant", "content": "Got it! I'll avoid thriller recommendations and suggest sci-fi movies in the future."}
+]
+await memory.add(messages, { userId: "alice", metadata: { category: "movies" } });
+```
+</CodeGroup>
+
+## Custom Base URL
+
+If you're using a different region (e.g., China), you can specify a custom base URL:
+
+```python
+config = {
+    "llm": {
+        "provider": "siliconflow",
+        "config": {
+            "model": "deepseek-ai/DeepSeek-V3",
+            "base_url": "https://api.siliconflow.cn/v1",  # For China region
+            "temperature": 0.1,
+            "max_tokens": 2000,
+        }
+    }
+}
+```
+
+## Supported Models
+
+SiliconFlow supports various models including:
+- deepseek-ai/DeepSeek-V3
+- deepseek-ai/DeepSeek-R1
+- Qwen/Qwen2.5-72B-Instruct
+- THUDM/GLM-4-9B
+- meta-llama/Meta-Llama-3.1-8B-Instruct
+
+And many more. Check the [SiliconFlow documentation](https://docs.siliconflow.com/) for the full list.
+
+## Config
+
+All available parameters for the `siliconflow` config are present in [Master List of All Params in Config](../config).
diff --git a/mem0/configs/llms/siliconflow.py b/mem0/configs/llms/siliconflow.py
@@ -0,0 +1,62 @@
+from typing import Any, Callable, Optional
+
+from mem0.configs.llms.base import BaseLlmConfig
+
+
+class SiliconFlowConfig(BaseLlmConfig):
+    """
+    Configuration class for SiliconFlow-specific parameters.
+    Inherits from BaseLlmConfig and adds SiliconFlow-specific settings.
+    """
+
+    def __init__(
+        self,
+        # Base parameters
+        model: Optional[str] = None,
+        temperature: float = 0.1,
+        api_key: Optional[str] = None,
+        max_tokens: int = 2000,
+        top_p: float = 0.1,
+        top_k: int = 1,
+        enable_vision: bool = False,
+        vision_details: Optional[str] = "auto",
+        http_client_proxies: Optional[dict] = None,
+        # SiliconFlow-specific parameters
+        base_url: Optional[str] = None,
+        # Response monitoring callback
+        response_callback: Optional[Callable[[Any, dict, dict], None]] = None,
+    ):
+        """
+        Initialize SiliconFlow configuration.
+
+        Args:
+            model: SiliconFlow model to use, defaults to "Qwen/Qwen2.5-7B-Instruct"
+            temperature: Controls randomness, defaults to 0.1
+            api_key: SiliconFlow API key, defaults to None
+            max_tokens: Maximum tokens to generate, defaults to 2000
+            top_p: Nucleus sampling parameter, defaults to 0.1
+            top_k: Top-k sampling parameter, defaults to 1
+            enable_vision: Enable vision capabilities, defaults to False
+            vision_details: Vision detail level, defaults to "auto"
+            http_client_proxies: HTTP client proxy settings, defaults to None
+            base_url: SiliconFlow API base URL, defaults to "https://api.siliconflow.com/v1"
+            response_callback: Optional callback for monitoring LLM responses.
+        """
+        # Initialize base parameters
+        super().__init__(
+            model=model,
+            temperature=temperature,
+            api_key=api_key,
+            max_tokens=max_tokens,
+            top_p=top_p,
+            top_k=top_k,
+            enable_vision=enable_vision,
+            vision_details=vision_details,
+            http_client_proxies=http_client_proxies,
+        )
+
+        # SiliconFlow-specific parameters
+        self.base_url = base_url
+
+        # Response monitoring
+        self.response_callback = response_callback
diff --git a/mem0/utils/factory.py b/mem0/utils/factory.py
@@ -9,6 +9,7 @@
 from mem0.configs.llms.lmstudio import LMStudioConfig
 from mem0.configs.llms.ollama import OllamaConfig
 from mem0.configs.llms.openai import OpenAIConfig
+from mem0.configs.llms.siliconflow import SiliconFlowConfig
 from mem0.configs.llms.vllm import VllmConfig
 from mem0.configs.rerankers.base import BaseRerankerConfig
 from mem0.configs.rerankers.cohere import CohereRerankerConfig
@@ -36,6 +37,7 @@ class LlmFactory:
         "ollama": ("mem0.llms.ollama.OllamaLLM", OllamaConfig),
         "openai": ("mem0.llms.openai.OpenAILLM", OpenAIConfig),
         "groq": ("mem0.llms.groq.GroqLLM", BaseLlmConfig),
+        "siliconflow": ("mem0.llms.siliconflow.SiliconFlowLLM", SiliconFlowConfig),
         "together": ("mem0.llms.together.TogetherLLM", BaseLlmConfig),
         "aws_bedrock": ("mem0.llms.aws_bedrock.AWSBedrockLLM", BaseLlmConfig),
         "litellm": ("mem0.llms.litellm.LiteLLM", BaseLlmConfig),
diff --git a/openmemory/api/app/utils/categorization.py b/openmemory/api/app/utils/categorization.py
@@ -1,31 +1,82 @@
 import logging
-from typing import List
+import os
+from typing import List, Optional
 
+from app.database import SessionLocal
+from app.models import Config as ConfigModel
 from app.utils.prompts import MEMORY_CATEGORIZATION_PROMPT
 from dotenv import load_dotenv
 from openai import OpenAI
 from pydantic import BaseModel
 from tenacity import retry, stop_after_attempt, wait_exponential
 
 load_dotenv()
-openai_client = OpenAI()
 
 
 class MemoryCategories(BaseModel):
     categories: List[str]
 
 
+def get_llm_config():
+    """Get LLM configuration from database or use defaults."""
+    try:
+        db = SessionLocal()
+        db_config = db.query(ConfigModel).filter(ConfigModel.key == "main").first()
+        
+        if db_config and "mem0" in db_config.value and "llm" in db_config.value["mem0"]:
+            llm_config = db_config.value["mem0"]["llm"]
+            db.close()
+            return llm_config
+        
+        db.close()
+    except Exception as e:
+        logging.warning(f"Failed to load LLM config from database: {e}")
+    
+    # Default configuration
+    return {
+        "provider": "openai",
+        "config": {
+            "model": "gpt-4o-mini",
+            "api_key": os.getenv("OPENAI_API_KEY")
+        }
+    }
+
+
+def parse_env_value(value):
+    """Parse environment variable references in config values."""
+    if isinstance(value, str) and value.startswith("env:"):
+        env_var = value.split(":", 1)[1]
+        return os.getenv(env_var)
+    return value
+
+
 @retry(stop=stop_after_attempt(3), wait=wait_exponential(multiplier=1, min=4, max=15))
 def get_categories_for_memory(memory: str) -> List[str]:
     try:
+        # Get LLM configuration
+        llm_config = get_llm_config()
+        config = llm_config.get("config", {})
+        
+        # Parse environment variables
+        api_key = parse_env_value(config.get("api_key", os.getenv("OPENAI_API_KEY")))
+        model = config.get("model", "gpt-4o-mini")
+        base_url = parse_env_value(config.get("openai_base_url")) if "openai_base_url" in config else None
+        
+        # Create OpenAI client with configured settings
+        client_kwargs = {"api_key": api_key}
+        if base_url:
+            client_kwargs["base_url"] = base_url
+        
+        openai_client = OpenAI(**client_kwargs)
+        
         messages = [
             {"role": "system", "content": MEMORY_CATEGORIZATION_PROMPT},
             {"role": "user", "content": memory}
         ]
 
         # Let OpenAI handle the pydantic parsing directly
         completion = openai_client.beta.chat.completions.parse(
-            model="gpt-4o-mini",
+            model=model,
             messages=messages,
             response_format=MemoryCategories,
             temperature=0