Skip to content

Commit 32fceb6

Browse files
Ronak BhalgamiRonak Bhalgami
authored andcommitted
fix: Use configured LLM model in categorization instead of hardcoded gpt-4o-mini
- Fixed hardcoded model issue in openmemory categorization - Categorization now reads LLM config from database - Supports custom base_url for OpenAI-compatible providers (SiliconFlow, etc.) - Added SiliconFlowConfig class for proper configuration - Updated factory to use SiliconFlowConfig - Added SiliconFlow documentation with usage examples Fixes issue where SiliconFlow users got 'Model does not exist' error because categorization was hardcoded to use gpt-4o-mini instead of the configured model like deepseek-ai/DeepSeek-R1. Resolves: mem0ai#3574
1 parent d5a130b commit 32fceb6

File tree

4 files changed

+215
-3
lines changed

4 files changed

+215
-3
lines changed
Lines changed: 97 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,97 @@
1+
---
2+
title: SiliconFlow
3+
---
4+
5+
[SiliconFlow](https://siliconflow.com/) is an AI inference platform that provides access to various open-source LLMs including DeepSeek, Qwen, GLM, and more.
6+
7+
In order to use LLMs from SiliconFlow, go to their [platform](https://siliconflow.com/) and get the API key. Set the API key as `SILICONFLOW_API_KEY` environment variable to use the model as given below in the example.
8+
9+
## Usage
10+
11+
<CodeGroup>
12+
```python Python
13+
import os
14+
from mem0 import Memory
15+
16+
os.environ["OPENAI_API_KEY"] = "your-api-key" # used for embedding model
17+
os.environ["SILICONFLOW_API_KEY"] = "your-api-key"
18+
19+
config = {
20+
"llm": {
21+
"provider": "siliconflow",
22+
"config": {
23+
"model": "deepseek-ai/DeepSeek-V3",
24+
"temperature": 0.1,
25+
"max_tokens": 2000,
26+
}
27+
}
28+
}
29+
30+
m = Memory.from_config(config)
31+
messages = [
32+
{"role": "user", "content": "I'm planning to watch a movie tonight. Any recommendations?"},
33+
{"role": "assistant", "content": "How about thriller movies? They can be quite engaging."},
34+
{"role": "user", "content": "I'm not a big fan of thriller movies but I love sci-fi movies."},
35+
{"role": "assistant", "content": "Got it! I'll avoid thriller recommendations and suggest sci-fi movies in the future."}
36+
]
37+
m.add(messages, user_id="alice", metadata={"category": "movies"})
38+
```
39+
40+
```typescript TypeScript
41+
import { Memory } from 'mem0ai/oss';
42+
43+
const config = {
44+
llm: {
45+
provider: 'siliconflow',
46+
config: {
47+
apiKey: process.env.SILICONFLOW_API_KEY || '',
48+
model: 'deepseek-ai/DeepSeek-V3',
49+
temperature: 0.1,
50+
maxTokens: 2000,
51+
},
52+
},
53+
};
54+
55+
const memory = new Memory(config);
56+
const messages = [
57+
{"role": "user", "content": "I'm planning to watch a movie tonight. Any recommendations?"},
58+
{"role": "assistant", "content": "How about thriller movies? They can be quite engaging."},
59+
{"role": "user", "content": "I'm not a big fan of thriller movies but I love sci-fi movies."},
60+
{"role": "assistant", "content": "Got it! I'll avoid thriller recommendations and suggest sci-fi movies in the future."}
61+
]
62+
await memory.add(messages, { userId: "alice", metadata: { category: "movies" } });
63+
```
64+
</CodeGroup>
65+
66+
## Custom Base URL
67+
68+
If you're using a different region (e.g., China), you can specify a custom base URL:
69+
70+
```python
71+
config = {
72+
"llm": {
73+
"provider": "siliconflow",
74+
"config": {
75+
"model": "deepseek-ai/DeepSeek-V3",
76+
"base_url": "https://api.siliconflow.cn/v1", # For China region
77+
"temperature": 0.1,
78+
"max_tokens": 2000,
79+
}
80+
}
81+
}
82+
```
83+
84+
## Supported Models
85+
86+
SiliconFlow supports various models including:
87+
- deepseek-ai/DeepSeek-V3
88+
- deepseek-ai/DeepSeek-R1
89+
- Qwen/Qwen2.5-72B-Instruct
90+
- THUDM/GLM-4-9B
91+
- meta-llama/Meta-Llama-3.1-8B-Instruct
92+
93+
And many more. Check the [SiliconFlow documentation](https://docs.siliconflow.com/) for the full list.
94+
95+
## Config
96+
97+
All available parameters for the `siliconflow` config are present in [Master List of All Params in Config](../config).

mem0/configs/llms/siliconflow.py

Lines changed: 62 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,62 @@
1+
from typing import Any, Callable, Optional
2+
3+
from mem0.configs.llms.base import BaseLlmConfig
4+
5+
6+
class SiliconFlowConfig(BaseLlmConfig):
7+
"""
8+
Configuration class for SiliconFlow-specific parameters.
9+
Inherits from BaseLlmConfig and adds SiliconFlow-specific settings.
10+
"""
11+
12+
def __init__(
13+
self,
14+
# Base parameters
15+
model: Optional[str] = None,
16+
temperature: float = 0.1,
17+
api_key: Optional[str] = None,
18+
max_tokens: int = 2000,
19+
top_p: float = 0.1,
20+
top_k: int = 1,
21+
enable_vision: bool = False,
22+
vision_details: Optional[str] = "auto",
23+
http_client_proxies: Optional[dict] = None,
24+
# SiliconFlow-specific parameters
25+
base_url: Optional[str] = None,
26+
# Response monitoring callback
27+
response_callback: Optional[Callable[[Any, dict, dict], None]] = None,
28+
):
29+
"""
30+
Initialize SiliconFlow configuration.
31+
32+
Args:
33+
model: SiliconFlow model to use, defaults to "Qwen/Qwen2.5-7B-Instruct"
34+
temperature: Controls randomness, defaults to 0.1
35+
api_key: SiliconFlow API key, defaults to None
36+
max_tokens: Maximum tokens to generate, defaults to 2000
37+
top_p: Nucleus sampling parameter, defaults to 0.1
38+
top_k: Top-k sampling parameter, defaults to 1
39+
enable_vision: Enable vision capabilities, defaults to False
40+
vision_details: Vision detail level, defaults to "auto"
41+
http_client_proxies: HTTP client proxy settings, defaults to None
42+
base_url: SiliconFlow API base URL, defaults to "https://api.siliconflow.com/v1"
43+
response_callback: Optional callback for monitoring LLM responses.
44+
"""
45+
# Initialize base parameters
46+
super().__init__(
47+
model=model,
48+
temperature=temperature,
49+
api_key=api_key,
50+
max_tokens=max_tokens,
51+
top_p=top_p,
52+
top_k=top_k,
53+
enable_vision=enable_vision,
54+
vision_details=vision_details,
55+
http_client_proxies=http_client_proxies,
56+
)
57+
58+
# SiliconFlow-specific parameters
59+
self.base_url = base_url
60+
61+
# Response monitoring
62+
self.response_callback = response_callback

mem0/utils/factory.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -9,6 +9,7 @@
99
from mem0.configs.llms.lmstudio import LMStudioConfig
1010
from mem0.configs.llms.ollama import OllamaConfig
1111
from mem0.configs.llms.openai import OpenAIConfig
12+
from mem0.configs.llms.siliconflow import SiliconFlowConfig
1213
from mem0.configs.llms.vllm import VllmConfig
1314
from mem0.configs.rerankers.base import BaseRerankerConfig
1415
from mem0.configs.rerankers.cohere import CohereRerankerConfig
@@ -36,6 +37,7 @@ class LlmFactory:
3637
"ollama": ("mem0.llms.ollama.OllamaLLM", OllamaConfig),
3738
"openai": ("mem0.llms.openai.OpenAILLM", OpenAIConfig),
3839
"groq": ("mem0.llms.groq.GroqLLM", BaseLlmConfig),
40+
"siliconflow": ("mem0.llms.siliconflow.SiliconFlowLLM", SiliconFlowConfig),
3941
"together": ("mem0.llms.together.TogetherLLM", BaseLlmConfig),
4042
"aws_bedrock": ("mem0.llms.aws_bedrock.AWSBedrockLLM", BaseLlmConfig),
4143
"litellm": ("mem0.llms.litellm.LiteLLM", BaseLlmConfig),

openmemory/api/app/utils/categorization.py

Lines changed: 54 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,31 +1,82 @@
11
import logging
2-
from typing import List
2+
import os
3+
from typing import List, Optional
34

5+
from app.database import SessionLocal
6+
from app.models import Config as ConfigModel
47
from app.utils.prompts import MEMORY_CATEGORIZATION_PROMPT
58
from dotenv import load_dotenv
69
from openai import OpenAI
710
from pydantic import BaseModel
811
from tenacity import retry, stop_after_attempt, wait_exponential
912

1013
load_dotenv()
11-
openai_client = OpenAI()
1214

1315

1416
class MemoryCategories(BaseModel):
1517
categories: List[str]
1618

1719

20+
def get_llm_config():
21+
"""Get LLM configuration from database or use defaults."""
22+
try:
23+
db = SessionLocal()
24+
db_config = db.query(ConfigModel).filter(ConfigModel.key == "main").first()
25+
26+
if db_config and "mem0" in db_config.value and "llm" in db_config.value["mem0"]:
27+
llm_config = db_config.value["mem0"]["llm"]
28+
db.close()
29+
return llm_config
30+
31+
db.close()
32+
except Exception as e:
33+
logging.warning(f"Failed to load LLM config from database: {e}")
34+
35+
# Default configuration
36+
return {
37+
"provider": "openai",
38+
"config": {
39+
"model": "gpt-4o-mini",
40+
"api_key": os.getenv("OPENAI_API_KEY")
41+
}
42+
}
43+
44+
45+
def parse_env_value(value):
46+
"""Parse environment variable references in config values."""
47+
if isinstance(value, str) and value.startswith("env:"):
48+
env_var = value.split(":", 1)[1]
49+
return os.getenv(env_var)
50+
return value
51+
52+
1853
@retry(stop=stop_after_attempt(3), wait=wait_exponential(multiplier=1, min=4, max=15))
1954
def get_categories_for_memory(memory: str) -> List[str]:
2055
try:
56+
# Get LLM configuration
57+
llm_config = get_llm_config()
58+
config = llm_config.get("config", {})
59+
60+
# Parse environment variables
61+
api_key = parse_env_value(config.get("api_key", os.getenv("OPENAI_API_KEY")))
62+
model = config.get("model", "gpt-4o-mini")
63+
base_url = parse_env_value(config.get("openai_base_url")) if "openai_base_url" in config else None
64+
65+
# Create OpenAI client with configured settings
66+
client_kwargs = {"api_key": api_key}
67+
if base_url:
68+
client_kwargs["base_url"] = base_url
69+
70+
openai_client = OpenAI(**client_kwargs)
71+
2172
messages = [
2273
{"role": "system", "content": MEMORY_CATEGORIZATION_PROMPT},
2374
{"role": "user", "content": memory}
2475
]
2576

2677
# Let OpenAI handle the pydantic parsing directly
2778
completion = openai_client.beta.chat.completions.parse(
28-
model="gpt-4o-mini",
79+
model=model,
2980
messages=messages,
3081
response_format=MemoryCategories,
3182
temperature=0

0 commit comments

Comments
 (0)