Skip to content

Commit ec2da37

Browse files
authored
fix(py): Inherit project name from parent for OpenAI agent wrapper (#2037)
CC @catherine-langchain
1 parent 390c4eb commit ec2da37

File tree

5 files changed

+96
-7
lines changed

5 files changed

+96
-7
lines changed

python/langsmith/__init__.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -21,7 +21,7 @@
2121

2222
# Avoid calling into importlib on every call to __version__
2323

24-
__version__ = "0.4.30"
24+
__version__ = "0.4.31"
2525
version = __version__ # for backwards compatibility
2626

2727

python/langsmith/wrappers/_openai_agents.py

Lines changed: 9 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -96,6 +96,7 @@ class RunData(TypedDict):
9696
start_time: datetime
9797
dotted_order: str
9898
parent_run_id: Optional[str]
99+
project_name: Optional[str]
99100

100101
class OpenAIAgentsTracingProcessor(tracing.TracingProcessor): # type: ignore[no-redef]
101102
"""Tracing processor for the `OpenAI Agents SDK <https://openai.github.io/openai-agents-python/>`_.
@@ -187,10 +188,12 @@ def on_trace_start(self, trace: tracing.Trace) -> None:
187188
start_time = datetime.now(timezone.utc)
188189

189190
# Use LangSmith parent run tree if available, else create new trace
191+
project_name = self._project_name
190192
if current_run_tree is not None:
191193
trace_id = str(current_run_tree.trace_id)
192194
parent_run_id = str(current_run_tree.id)
193195
parent_dotted_order = current_run_tree.dotted_order
196+
project_name = self._project_name or current_run_tree.session_name
194197
else:
195198
trace_id = trace_run_id
196199
parent_run_id = None
@@ -207,6 +210,7 @@ def on_trace_start(self, trace: tracing.Trace) -> None:
207210
start_time=start_time,
208211
dotted_order=dotted_order,
209212
parent_run_id=parent_run_id,
213+
project_name=project_name,
210214
)
211215

212216
run_extra = {"metadata": self._metadata or {}}
@@ -228,7 +232,7 @@ def on_trace_start(self, trace: tracing.Trace) -> None:
228232
revision_id=None,
229233
extra=run_extra,
230234
tags=self._tags,
231-
project_name=self._project_name,
235+
project_name=project_name,
232236
)
233237

234238
self.client.create_run(**run_data)
@@ -250,7 +254,7 @@ def on_trace_end(self, trace: tracing.Trace) -> None:
250254
inputs=self._first_response_inputs.pop(trace.trace_id, {}),
251255
outputs=self._last_response_outputs.pop(trace.trace_id, {}),
252256
extra={"metadata": metadata},
253-
project_name=self._project_name,
257+
project_name=run["project_name"],
254258
)
255259
except Exception as e:
256260
logger.exception(f"Error updating trace run: {e}")
@@ -287,6 +291,7 @@ def on_span_start(self, span: tracing.Span) -> None:
287291
start_time=span_start_time,
288292
dotted_order=dotted_order,
289293
parent_run_id=parent_run["id"],
294+
project_name=parent_run["project_name"],
290295
)
291296

292297
run_name = agent_utils.get_run_name(span)
@@ -302,7 +307,7 @@ def on_span_start(self, span: tracing.Span) -> None:
302307
parent_run_id=parent_run["id"],
303308
dotted_order=dotted_order,
304309
inputs=extracted.get("inputs", {}),
305-
project_name=self._project_name,
310+
project_name=parent_run["project_name"],
306311
)
307312
if span.started_at:
308313
run_data["start_time"] = datetime.fromisoformat(span.started_at)
@@ -330,7 +335,7 @@ def on_span_end(self, span: tracing.Span) -> None:
330335
outputs=outputs,
331336
inputs=inputs,
332337
extra=extracted,
333-
project_name=self._project_name,
338+
project_name=run["project_name"],
334339
)
335340
if span.ended_at:
336341
run_data["end_time"] = datetime.fromisoformat(span.ended_at)

python/pyproject.toml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,7 @@ build-backend = "hatchling.build"
44

55
[project]
66
name = "langsmith"
7-
version = "0.4.30"
7+
version = "0.4.31"
88
description = "Client library to connect to the LangSmith LLM Tracing and Evaluation Platform."
99
authors = [
1010
{name = "LangChain", email = "[email protected]"},

python/tests/integration_tests/wrappers/test_openai_agents.py

Lines changed: 84 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,12 +1,14 @@
11
import asyncio
22
import json
33
from unittest import mock
4+
from uuid import uuid4
45

56
import pytest
67
from agents import Agent, Runner, set_trace_processors
78

89
import langsmith
910
from langsmith.wrappers import OpenAIAgentsTracingProcessor
11+
from tests.integration_tests.test_client import safe_delete_dataset
1012

1113

1214
def _collect_trace_requests(mock_session: mock.MagicMock):
@@ -78,3 +80,85 @@ async def test_openai_agents_tracing_processor():
7880
event for event in all_events if event.get("name") == "Agent workflow"
7981
]
8082
assert len(agent_runs) > 0, "No agent workflow runs found in trace"
83+
84+
85+
@pytest.mark.xfail(reason="Flaky test - may fail intermittently")
86+
async def test_openai_agents_with_evaluate():
87+
client = langsmith.Client()
88+
89+
processor = OpenAIAgentsTracingProcessor(client=client)
90+
set_trace_processors([processor])
91+
92+
agent = Agent(
93+
name="Captain Obvious",
94+
instructions="You are Captain Obvious, the world's"
95+
+ " most literal technical support agent.",
96+
)
97+
98+
question = (
99+
"Why is my code failing when I try to divide by zero?"
100+
" I keep getting this error message."
101+
)
102+
103+
# Example: Cat image validation
104+
question = "What type of cat is shown in this image?"
105+
provided_answer = "This is a tuxedo cat with black and white fur pattern."
106+
image_url = "https://upload.wikimedia.org/wikipedia/commons/thumb/b/b7/George%2C_a_perfect_example_of_a_tuxedo_cat.jpg/1250px-George%2C_a_perfect_example_of_a_tuxedo_cat.jpg"
107+
108+
dataset_name = "OpenAI Agent Testing" + str(uuid4().hex[:4])
109+
110+
if client.has_dataset(dataset_name=dataset_name):
111+
safe_delete_dataset(client, dataset_name=dataset_name)
112+
try:
113+
dataset = client.create_dataset(dataset_name=dataset_name)
114+
client.create_examples(
115+
inputs=[
116+
{
117+
"question": question,
118+
"answer": provided_answer,
119+
"image_url": image_url,
120+
}
121+
],
122+
outputs=[{"is_correct": True}],
123+
dataset_id=dataset.id,
124+
)
125+
126+
async def run_agent(inputs: dict):
127+
"""Run agent and track the final response."""
128+
await Runner.run(agent, question)
129+
return {"result": "foo"}
130+
131+
async def alignment(outputs: dict, reference_outputs: dict) -> bool:
132+
"""Check if the agent chose the correct route."""
133+
return True
134+
135+
experiment = await client.aevaluate(
136+
run_agent,
137+
data=dataset_name,
138+
evaluators=[alignment],
139+
experiment_prefix="agent-gpt-5-mini",
140+
max_concurrency=2,
141+
blocking=True,
142+
)
143+
await asyncio.sleep(5)
144+
experiment_results = client.get_experiment_results(
145+
name=experiment.experiment_name
146+
)
147+
assert experiment_results["stats"].run_count == 1
148+
assert (
149+
experiment_results["stats"].feedback_stats.get("alignment", {}).get("n")
150+
== 1
151+
)
152+
assert (
153+
experiment_results["stats"].feedback_stats.get("alignment", {}).get("avg")
154+
== 1
155+
)
156+
examples = list(experiment_results["examples_with_runs"])
157+
assert len(examples) == 1
158+
run = client.read_run(examples[0].runs[0].id, load_child_runs=True)
159+
assert len(run.child_runs) == 1
160+
assert run.child_runs[0].name == "Agent workflow"
161+
assert len(run.child_runs[0].child_runs) == 1
162+
assert run.child_runs[0].child_runs[0].name == "Captain Obvious"
163+
finally:
164+
safe_delete_dataset(client, dataset_name=dataset_name)

python/uv.lock

Lines changed: 1 addition & 1 deletion
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

0 commit comments

Comments
 (0)