langchain-ai · darinkishore · Sep 18, 2025 · chatgpt-codex-connector · Sep 18, 2025 · darinkishore
diff --git a/python/langsmith/client.py b/python/langsmith/client.py
@@ -1,3 +1,3 @@
 """Client for interacting with the LangSmith API.

 Use the client to customize API keys / workspace connections, SSL certs,
@@ -5938,13 +5938,13 @@
             single_result: Union[ls_evaluator.EvaluationResult, dict],
         ) -> ls_evaluator.EvaluationResult:
             if isinstance(single_result, dict):
-                return ls_evaluator.EvaluationResult(
-                    **{
-                        "key": fn_name,
-                        "comment": single_result.get("reasoning"),
-                        **single_result,
-                    }
-                )
+                merged_result: dict[str, Any] = {**single_result}
+                if "reasoning" in merged_result and "comment" not in merged_result:
+                    merged_result["comment"] = merged_result["reasoning"]
+                merged_result.pop("reasoning", None)
+                if fn_name is not None and merged_result.get("key") is None:
+                    merged_result["key"] = fn_name
+                return ls_evaluator.EvaluationResult(**merged_result)
             return single_result
 
         def _is_eval_results(results: Any) -> TypeGuard[ls_evaluator.EvaluationResults]:

diff --git a/python/langsmith/evaluation/evaluator.py b/python/langsmith/evaluation/evaluator.py
@@ -4,9 +4,11 @@
 
 import asyncio
 import inspect
+import logging
 import uuid
 from abc import abstractmethod
 from collections.abc import Awaitable, Sequence
+from functools import wraps
 from typing import (
     Any,
     Callable,
@@ -16,29 +18,11 @@
     cast,
 )
 
+from pydantic import BaseModel, ConfigDict, Field, ValidationError, model_validator
 from typing_extensions import TypedDict
 
 from langsmith import run_helpers as rh
 from langsmith import schemas
-
-try:
-    from pydantic.v1 import (  # type: ignore[import]
-        BaseModel,
-        Field,
-        ValidationError,
-        validator,
-    )
-except ImportError:
-    from pydantic import (  # type: ignore[assignment]
-        BaseModel,
-        Field,
-        ValidationError,
-        validator,
-    )
-
-import logging
-from functools import wraps
-
 from langsmith.schemas import SCORE_TYPE, VALUE_TYPE, Example, Run
 
 logger = logging.getLogger(__name__)
@@ -77,6 +61,8 @@ class EvaluationResult(BaseModel):
     """The numeric score for this evaluation."""
     value: VALUE_TYPE = None
     """The value for this evaluation, if not numeric."""
+    metadata: Optional[dict] = None
+    """Arbitrary metadata attached to the evaluation."""
     comment: Optional[str] = None
     """An explanation regarding the evaluation."""
     correction: Optional[dict] = None
@@ -95,24 +81,17 @@ class EvaluationResult(BaseModel):
     extra: Optional[dict] = None
     """Metadata for the evaluator run."""
 
-    class Config:
-        """Pydantic model configuration."""
-
-        allow_extra = False
-
-    @validator("value", pre=True)
-    def check_value_non_numeric(cls, v, values):
-        """Check that the value is not numeric."""
-        # If a score isn't provided and the value is numeric
-        # it's more likely the user intended use the score field
-        if "score" not in values or values["score"] is None:
-            if isinstance(v, (int, float)):
-                logger.warning(
-                    "Numeric values should be provided in"
-                    " the 'score' field, not 'value'."
-                    f" Got: {v}"
-                )
-        return v
+    model_config = ConfigDict(extra="forbid")
+
+    @model_validator(mode="after")
+    def check_value_non_numeric(self) -> EvaluationResult:
+        """Warn when numeric values are passed via the `value` field."""
+        if self.score is None and isinstance(self.value, (int, float)):
+            logger.warning(
+                "Numeric values should be provided in the 'score' field, not 'value'."
+                f" Got: {self.value}"
+            )
+        return self
 
 
 class EvaluationResults(TypedDict, total=False):

diff --git a/python/langsmith/run_trees.py b/python/langsmith/run_trees.py
@@ -2,29 +2,21 @@
 
 from __future__ import annotations
 
+import contextvars
 import functools
 import json
 import logging
 import sys
+import threading
+import urllib.parse
 from collections.abc import Mapping, Sequence
 from datetime import datetime, timezone
 from typing import Any, Optional, Union, cast
 from uuid import NAMESPACE_DNS, UUID, uuid4, uuid5
 
+from pydantic import ConfigDict, Field, model_validator
 from typing_extensions import TypedDict
 
-try:
-    from pydantic.v1 import Field, root_validator  # type: ignore[import]
-except ImportError:
-    from pydantic import (  # type: ignore[assignment, no-redef]
-        Field,
-        root_validator,
-    )
-
-import contextvars
-import threading
-import urllib.parse
-
 import langsmith._internal._context as _context
 from langsmith import schemas as ls_schemas
 from langsmith import utils
@@ -195,7 +187,7 @@ class RunTree(ls_schemas.RunBase):
     parent_dotted_order: Optional[str] = Field(default=None, exclude=True)
     child_runs: list[RunTree] = Field(
         default_factory=list,
-        exclude={"__all__": {"parent_run_id"}},
+        exclude=cast(Any, {"__all__": {"parent_run_id"}}),
     )
     session_name: str = Field(
         default_factory=lambda: utils.get_tracer_project() or "default",
@@ -220,15 +212,14 @@ class RunTree(ls_schemas.RunBase):
         description="Projects to replicate this run to with optional updates.",
     )
 
-    class Config:
-        """Pydantic model configuration."""
-
-        arbitrary_types_allowed = True
-        allow_population_by_field_name = True
-        extra = "ignore"
+    model_config = ConfigDict(
+        arbitrary_types_allowed=True,
+        populate_by_name=True,
+        extra="ignore",
+    )
 
-    @root_validator(pre=True)
-    def infer_defaults(cls, values: dict) -> dict:
+    @model_validator(mode="before")
+    def infer_defaults(cls, values: dict[str, Any]) -> dict[str, Any]:
         """Assign name to the run."""
         if values.get("name") is None and values.get("serialized") is not None:
             if "name" in values["serialized"]:
@@ -268,21 +259,19 @@ def infer_defaults(cls, values: dict) -> dict:
         values["replicas"] = _ensure_write_replicas(values["replicas"])
         return values
 
-    @root_validator(pre=False)
-    def ensure_dotted_order(cls, values: dict) -> dict:
+    @model_validator(mode="after")
+    def ensure_dotted_order(self) -> RunTree:
         """Ensure the dotted order of the run."""
-        current_dotted_order = values.get("dotted_order")
+        current_dotted_order = self.dotted_order
         if current_dotted_order and current_dotted_order.strip():
-            return values
-        current_dotted_order = _create_current_dotted_order(
-            values["start_time"], values["id"]
-        )
-        parent_dotted_order = values.get("parent_dotted_order")
+            return self
+        current_dotted_order = _create_current_dotted_order(self.start_time, self.id)
+        parent_dotted_order = self.parent_dotted_order
         if parent_dotted_order is not None:
-            values["dotted_order"] = parent_dotted_order + "." + current_dotted_order
+            self.dotted_order = parent_dotted_order + "." + current_dotted_order
         else:
-            values["dotted_order"] = current_dotted_order
-        return values
+            self.dotted_order = current_dotted_order
+        return self
 
     @property
     def client(self) -> Client:

diff --git a/python/langsmith/schemas.py b/python/langsmith/schemas.py
@@ -5,6 +5,7 @@
 from datetime import datetime, timedelta, timezone
 from decimal import Decimal
 from enum import Enum
+from pathlib import Path
 from typing import (
     Any,
     NamedTuple,
@@ -15,33 +16,19 @@
 )
 from uuid import UUID
 
-from typing_extensions import NotRequired, TypedDict
-
-try:
-    from pydantic.v1 import (
-        BaseModel,
-        Field,  # type: ignore[import]
-        PrivateAttr,
-        StrictBool,
-        StrictFloat,
-        StrictInt,
-    )
-except ImportError:
-    from pydantic import (  # type: ignore[assignment]
-        BaseModel,
-        Field,
-        PrivateAttr,
-        StrictBool,
-        StrictFloat,
-        StrictInt,
-    )
-
-from pathlib import Path
-
-from typing_extensions import Literal
+from pydantic import (
+    BaseModel,
+    ConfigDict,
+    Field,
+    PrivateAttr,
+    StrictBool,
+    StrictFloat,
+    StrictInt,
+)
+from typing_extensions import Literal, NotRequired, TypedDict
 
 SCORE_TYPE = Union[StrictBool, StrictInt, StrictFloat, None]
-VALUE_TYPE = Union[dict, str, None]
+VALUE_TYPE = Union[dict, str, StrictBool, StrictInt, StrictFloat, None]
 
 
 class Attachment(NamedTuple):
@@ -96,11 +83,7 @@ class ExampleBase(BaseModel):
     outputs: Optional[dict[str, Any]] = Field(default=None)
     metadata: Optional[dict[str, Any]] = Field(default=None)
 
-    class Config:
-        """Configuration class for the schema."""
-
-        frozen = True
-        arbitrary_types_allowed = True
+    model_config = ConfigDict(frozen=True, arbitrary_types_allowed=True)
 
 
 class _AttachmentDict(TypedDict):
@@ -221,10 +204,7 @@ class ExampleUpdate(BaseModel):
     attachments: Optional[Attachments] = None
     attachments_operations: Optional[AttachmentsOperations] = None
 
-    class Config:
-        """Configuration class for the schema."""
-
-        frozen = True
+    model_config = ConfigDict(frozen=True)
 
     def __init__(self, **data):
         """Initialize from dict."""
@@ -249,10 +229,7 @@ class DatasetBase(BaseModel):
     description: Optional[str] = None
     data_type: Optional[DataType] = None
 
-    class Config:
-        """Configuration class for the schema."""
-
-        frozen = True
+    model_config = ConfigDict(frozen=True)
 
 
 DatasetTransformationType = Literal[
@@ -412,10 +389,7 @@ def __repr__(self):
         """Return a string representation of the RunBase object."""
         return f"{self.__class__}(id={self.id}, name='{self.name}', run_type='{self.run_type}')"
 
-    class Config:
-        """Configuration class for the schema."""
-
-        arbitrary_types_allowed = True
+    model_config = ConfigDict(arbitrary_types_allowed=True)
 
 
 class Run(RunBase):
@@ -672,10 +646,7 @@ class FeedbackBase(BaseModel):
     extra: Optional[dict] = None
     """The metadata of the feedback."""
 
-    class Config:
-        """Configuration class for the schema."""
-
-        frozen = True
+    model_config = ConfigDict(frozen=True)
 
 
 class FeedbackCategory(TypedDict, total=False):
@@ -783,35 +754,35 @@ class TracerSessionResult(TracerSession):
     Sessions are also referred to as "Projects" in the UI.
     """
 
-    run_count: Optional[int]
+    run_count: Optional[int] = None
     """The number of runs in the project."""
-    latency_p50: Optional[timedelta]
+    latency_p50: Optional[timedelta] = None
     """The median (50th percentile) latency for the project."""
-    latency_p99: Optional[timedelta]
+    latency_p99: Optional[timedelta] = None
     """The 99th percentile latency for the project."""
-    total_tokens: Optional[int]
+    total_tokens: Optional[int] = None
     """The total number of tokens consumed in the project."""
-    prompt_tokens: Optional[int]
+    prompt_tokens: Optional[int] = None
     """The total number of prompt tokens consumed in the project."""
-    completion_tokens: Optional[int]
+    completion_tokens: Optional[int] = None
     """The total number of completion tokens consumed in the project."""
-    last_run_start_time: Optional[datetime]
+    last_run_start_time: Optional[datetime] = None
     """The start time of the last run in the project."""
-    feedback_stats: Optional[dict[str, Any]]
+    feedback_stats: Optional[dict[str, Any]] = None
     """Feedback stats for the project."""
-    run_facets: Optional[list[dict[str, Any]]]
+    run_facets: Optional[list[dict[str, Any]]] = None
     """Facets for the runs in the project."""
-    total_cost: Optional[Decimal]
+    total_cost: Optional[Decimal] = None
     """The total estimated LLM cost associated with the completion tokens."""
-    prompt_cost: Optional[Decimal]
+    prompt_cost: Optional[Decimal] = None
     """The estimated cost associated with the prompt (input) tokens."""
-    completion_cost: Optional[Decimal]
+    completion_cost: Optional[Decimal] = None
     """The estimated cost associated with the completion tokens."""
-    first_token_p50: Optional[timedelta]
+    first_token_p50: Optional[timedelta] = None
     """The median (50th percentile) time to process the first token."""
-    first_token_p99: Optional[timedelta]
+    first_token_p99: Optional[timedelta] = None
     """The 99th percentile time to process the first token."""
-    error_rate: Optional[float]
+    error_rate: Optional[float] = None
     """The error rate for the project."""
 
 
@@ -893,7 +864,7 @@ class LangSmithInfo(BaseModel):
     instance_flags: Optional[dict[str, Any]] = None
 
 
-Example.update_forward_refs()
+Example.model_rebuild()
 
 
 class LangSmithSettings(BaseModel):