Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
8 changes: 7 additions & 1 deletion server/reflector/llm.py
Original file line number Diff line number Diff line change
Expand Up @@ -206,6 +206,12 @@ def _configure_llamaindex(self):
"""Configure llamaindex Settings with OpenAILike LLM"""
session_id = llm_session_id.get() or f"fallback-{uuid4().hex}"

extra_body: dict = {"litellm_session_id": session_id}
# Only send enable_thinking when explicitly set (not None/unset).
# Models that don't support it will ignore the param.
if self.settings_obj.LLM_ENABLE_THINKING is not None:
extra_body["enable_thinking"] = self.settings_obj.LLM_ENABLE_THINKING

Settings.llm = OpenAILike(
model=self.model_name,
api_base=self.url,
Expand All @@ -215,7 +221,7 @@ def _configure_llamaindex(self):
is_function_calling_model=False,
temperature=self.temperature,
max_tokens=self.max_tokens,
additional_kwargs={"extra_body": {"litellm_session_id": session_id}},
additional_kwargs={"extra_body": extra_body},
)

async def get_response(
Expand Down
1 change: 1 addition & 0 deletions server/reflector/settings.py
Original file line number Diff line number Diff line change
Expand Up @@ -75,6 +75,7 @@ class Settings(BaseSettings):
LLM_URL: str | None = None
LLM_API_KEY: str | None = None
LLM_CONTEXT_WINDOW: int = 16000
LLM_ENABLE_THINKING: bool | None = None

LLM_PARSE_MAX_RETRIES: int = (
3 # Max retries for JSON/validation errors (total attempts = retries + 1)
Expand Down
52 changes: 52 additions & 0 deletions server/tests/test_llm_retry.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@
from workflows.errors import WorkflowRuntimeError, WorkflowTimeoutError

from reflector.llm import LLM, LLMParseError, StructuredOutputWorkflow
from reflector.settings import Settings
from reflector.utils.retry import RetryException


Expand All @@ -26,6 +27,57 @@ def make_completion_response(text: str):
return response


class TestLLMEnableThinking:
"""Test that LLM_ENABLE_THINKING setting is passed through to OpenAILike"""

def test_enable_thinking_false_passed_in_extra_body(self):
"""enable_thinking=False should be in extra_body when LLM_ENABLE_THINKING=False"""
settings = Settings(
LLM_ENABLE_THINKING=False,
LLM_URL="http://fake",
LLM_API_KEY="fake",
)

with (
patch("reflector.llm.OpenAILike") as mock_openai,
patch("reflector.llm.Settings"),
):
LLM(settings=settings)
extra_body = mock_openai.call_args.kwargs["additional_kwargs"]["extra_body"]
assert extra_body["enable_thinking"] is False

def test_enable_thinking_true_passed_in_extra_body(self):
"""enable_thinking=True should be in extra_body when LLM_ENABLE_THINKING=True"""
settings = Settings(
LLM_ENABLE_THINKING=True,
LLM_URL="http://fake",
LLM_API_KEY="fake",
)

with (
patch("reflector.llm.OpenAILike") as mock_openai,
patch("reflector.llm.Settings"),
):
LLM(settings=settings)
extra_body = mock_openai.call_args.kwargs["additional_kwargs"]["extra_body"]
assert extra_body["enable_thinking"] is True

def test_enable_thinking_none_not_in_extra_body(self):
"""enable_thinking should not be in extra_body when LLM_ENABLE_THINKING is None (default)"""
settings = Settings(
LLM_URL="http://fake",
LLM_API_KEY="fake",
)

with (
patch("reflector.llm.OpenAILike") as mock_openai,
patch("reflector.llm.Settings"),
):
LLM(settings=settings)
extra_body = mock_openai.call_args.kwargs["additional_kwargs"]["extra_body"]
assert "enable_thinking" not in extra_body


class TestLLMParseErrorRecovery:
"""Test parse error recovery with Workflow feedback loop"""

Expand Down
Loading