diff --git a/changelog/4018.fixed.md b/changelog/4018.fixed.md new file mode 100644 index 0000000000..d7be87dcee --- /dev/null +++ b/changelog/4018.fixed.md @@ -0,0 +1 @@ +- Fixed LLM tracing model attribution so `gen_ai.request.model` stays stable to the configured request model and `gen_ai.response.model` is populated from provider response data when it arrives, including OpenAI-compatible streaming integrations. diff --git a/src/pipecat/services/openai/base_llm.py b/src/pipecat/services/openai/base_llm.py index eb8ce3cc66..6472200033 100644 --- a/src/pipecat/services/openai/base_llm.py +++ b/src/pipecat/services/openai/base_llm.py @@ -45,6 +45,10 @@ from pipecat.services.settings import NOT_GIVEN as _NOT_GIVEN from pipecat.services.settings import LLMSettings, _NotGiven from pipecat.utils.tracing.service_decorators import traced_llm +from pipecat.utils.tracing.setup import is_tracing_available + +if is_tracing_available(): + from opentelemetry import trace @dataclass @@ -257,6 +261,11 @@ def set_full_model_name(self, full_model_name: str): """ self._full_model_name = full_model_name + if is_tracing_available() and "trace" in globals(): + current_span = trace.get_current_span() + if current_span.is_recording(): + current_span.set_attribute("gen_ai.response.model", full_model_name) + def get_full_model_name(self): """Get the current full model name. diff --git a/src/pipecat/services/sambanova/llm.py b/src/pipecat/services/sambanova/llm.py index 710a22db21..7446fc1f3f 100644 --- a/src/pipecat/services/sambanova/llm.py +++ b/src/pipecat/services/sambanova/llm.py @@ -188,6 +188,9 @@ async def _process_context( ) await self.start_llm_usage_metrics(tokens) + if chunk.model and self.get_full_model_name() != chunk.model: + self.set_full_model_name(chunk.model) + if chunk.choices is None or len(chunk.choices) == 0: continue diff --git a/src/pipecat/utils/tracing/service_attributes.py b/src/pipecat/utils/tracing/service_attributes.py index 21a22341f2..22198bd509 100644 --- a/src/pipecat/utils/tracing/service_attributes.py +++ b/src/pipecat/utils/tracing/service_attributes.py @@ -187,6 +187,7 @@ def add_llm_span_attributes( span: "Span", service_name: str, model: str, + response_model: Optional[str] = None, stream: bool = True, messages: Optional[str] = None, output: Optional[str] = None, @@ -204,7 +205,8 @@ def add_llm_span_attributes( Args: span: The span to add attributes to. service_name: Name of the LLM service (e.g., "openai"). - model: Model name/identifier. + model: Requested model name/identifier. + response_model: Actual model name returned by the provider. stream: Whether streaming is enabled. messages: JSON-serialized messages. output: Aggregated output text from the LLM. @@ -220,6 +222,8 @@ def add_llm_span_attributes( # Add standard attributes span.set_attribute("gen_ai.system", _get_gen_ai_system_from_service_name(service_name)) span.set_attribute("gen_ai.request.model", model) + if response_model: + span.set_attribute("gen_ai.response.model", response_model) span.set_attribute("gen_ai.operation.name", "chat") span.set_attribute("gen_ai.output.type", "text") span.set_attribute("stream", stream) diff --git a/src/pipecat/utils/tracing/service_decorators.py b/src/pipecat/utils/tracing/service_decorators.py index f4764d2485..b561aff51c 100644 --- a/src/pipecat/utils/tracing/service_decorators.py +++ b/src/pipecat/utils/tracing/service_decorators.py @@ -52,7 +52,6 @@ def _get_model_name(service) -> str: """ return ( getattr(getattr(service, "_settings", None), "model", None) - or getattr(service, "_full_model_name", None) or getattr(service, "model_name", None) or getattr(service, "_model_name", None) or "unknown" @@ -525,6 +524,10 @@ async def wrapped_start_llm_usage_metrics(tokens): "parameters": params, } + full_model_name = getattr(self, "_full_model_name", None) + if full_model_name: + attribute_kwargs["response_model"] = full_model_name + # Add optional attributes only if they exist if serialized_messages: attribute_kwargs["messages"] = serialized_messages diff --git a/tests/test_llm_tracing_models.py b/tests/test_llm_tracing_models.py new file mode 100644 index 0000000000..fa19f3b7ab --- /dev/null +++ b/tests/test_llm_tracing_models.py @@ -0,0 +1,90 @@ +# +# Copyright (c) 2024-2026, Daily +# +# SPDX-License-Identifier: BSD 2-Clause License +# + +"""Unit tests for LLM tracing model attributes.""" + +import importlib.util +from pathlib import Path +from types import SimpleNamespace +from typing import cast + +from pipecat.utils.tracing.service_attributes import add_llm_span_attributes +from pipecat.utils.tracing.service_decorators import _get_model_name + + +class _FakeSpan: + def __init__(self, recording: bool = True): + self.attributes = {} + self._recording = recording + + def set_attribute(self, key, value): + self.attributes[key] = value + + def is_recording(self): + return self._recording + + +class _ServiceWithModels: + def __init__(self): + self._settings = SimpleNamespace(model="requested-model") + self._full_model_name = "response-model" + + +def _load_base_llm_module(): + module_path = Path(__file__).resolve().parents[1] / "src/pipecat/services/openai/base_llm.py" + spec = importlib.util.spec_from_file_location("test_base_llm_module", module_path) + assert spec is not None and spec.loader is not None + module = importlib.util.module_from_spec(spec) + spec.loader.exec_module(module) + return module + + +def test_get_model_name_prefers_requested_model_over_response_model(): + """Request model should not drift to a previously observed response model.""" + service = _ServiceWithModels() + + model_name = _get_model_name(service) + + assert model_name == "requested-model" + + +def test_add_llm_span_attributes_sets_response_model_when_provided(): + """LLM span attributes should include request and response model names.""" + span = _FakeSpan() + + add_llm_span_attributes( + span=span, + service_name="OpenAILLMService", + model="requested-model", + response_model="resolved-model-2026-01-01", + ) + + assert span.attributes["gen_ai.request.model"] == "requested-model" + assert span.attributes["gen_ai.response.model"] == "resolved-model-2026-01-01" + + +def test_set_full_model_name_updates_active_span_response_model(): + """Response model should be written when response data is received.""" + base_llm = _load_base_llm_module() + fake_span = _FakeSpan(recording=True) + fake_trace = SimpleNamespace(get_current_span=lambda: fake_span) + fake_service = SimpleNamespace(_full_model_name="") + + original_is_tracing_available = base_llm.is_tracing_available + original_trace = getattr(base_llm, "trace", None) + try: + setattr(base_llm, "is_tracing_available", lambda: True) + setattr(base_llm, "trace", fake_trace) + base_llm.BaseOpenAILLMService.set_full_model_name(fake_service, "gpt-4o-2026-02-14") + finally: + setattr(base_llm, "is_tracing_available", original_is_tracing_available) + if original_trace is None: + delattr(base_llm, "trace") + else: + setattr(base_llm, "trace", cast(object, original_trace)) + + assert fake_service._full_model_name == "gpt-4o-2026-02-14" + assert fake_span.attributes["gen_ai.response.model"] == "gpt-4o-2026-02-14"