Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
14 changes: 7 additions & 7 deletions python/langsmith/client.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,3 @@
"""Client for interacting with the LangSmith API.

Use the client to customize API keys / workspace connections, SSL certs,
Expand Down Expand Up @@ -5938,13 +5938,13 @@
single_result: Union[ls_evaluator.EvaluationResult, dict],
) -> ls_evaluator.EvaluationResult:
if isinstance(single_result, dict):
return ls_evaluator.EvaluationResult(
**{
"key": fn_name,
"comment": single_result.get("reasoning"),
**single_result,
}
)
merged_result: dict[str, Any] = {**single_result}
if "reasoning" in merged_result and "comment" not in merged_result:
merged_result["comment"] = merged_result["reasoning"]
merged_result.pop("reasoning", None)
if fn_name is not None and merged_result.get("key") is None:
merged_result["key"] = fn_name
return ls_evaluator.EvaluationResult(**merged_result)
return single_result

def _is_eval_results(results: Any) -> TypeGuard[ls_evaluator.EvaluationResults]:
Expand Down
53 changes: 16 additions & 37 deletions python/langsmith/evaluation/evaluator.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,9 +4,11 @@

import asyncio
import inspect
import logging
import uuid
from abc import abstractmethod
from collections.abc import Awaitable, Sequence
from functools import wraps
from typing import (
Any,
Callable,
Expand All @@ -16,29 +18,11 @@
cast,
)

from pydantic import BaseModel, ConfigDict, Field, ValidationError, model_validator
from typing_extensions import TypedDict

from langsmith import run_helpers as rh
from langsmith import schemas

try:
from pydantic.v1 import ( # type: ignore[import]
BaseModel,
Field,
ValidationError,
validator,
)
except ImportError:
from pydantic import ( # type: ignore[assignment]
BaseModel,
Field,
ValidationError,
validator,
)

import logging
from functools import wraps

from langsmith.schemas import SCORE_TYPE, VALUE_TYPE, Example, Run

logger = logging.getLogger(__name__)
Expand Down Expand Up @@ -77,6 +61,8 @@ class EvaluationResult(BaseModel):
"""The numeric score for this evaluation."""
value: VALUE_TYPE = None
"""The value for this evaluation, if not numeric."""
metadata: Optional[dict] = None
"""Arbitrary metadata attached to the evaluation."""
comment: Optional[str] = None
"""An explanation regarding the evaluation."""
correction: Optional[dict] = None
Expand All @@ -95,24 +81,17 @@ class EvaluationResult(BaseModel):
extra: Optional[dict] = None
"""Metadata for the evaluator run."""

class Config:
"""Pydantic model configuration."""

allow_extra = False

@validator("value", pre=True)
def check_value_non_numeric(cls, v, values):
"""Check that the value is not numeric."""
# If a score isn't provided and the value is numeric
# it's more likely the user intended use the score field
if "score" not in values or values["score"] is None:
if isinstance(v, (int, float)):
logger.warning(
"Numeric values should be provided in"
" the 'score' field, not 'value'."
f" Got: {v}"
)
return v
model_config = ConfigDict(extra="forbid")

@model_validator(mode="after")
def check_value_non_numeric(self) -> EvaluationResult:
"""Warn when numeric values are passed via the `value` field."""
if self.score is None and isinstance(self.value, (int, float)):
logger.warning(
"Numeric values should be provided in the 'score' field, not 'value'."
f" Got: {self.value}"
)
return self


class EvaluationResults(TypedDict, total=False):
Expand Down
53 changes: 21 additions & 32 deletions python/langsmith/run_trees.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,29 +2,21 @@

from __future__ import annotations

import contextvars
import functools
import json
import logging
import sys
import threading
import urllib.parse
from collections.abc import Mapping, Sequence
from datetime import datetime, timezone
from typing import Any, Optional, Union, cast
from uuid import NAMESPACE_DNS, UUID, uuid4, uuid5

from pydantic import ConfigDict, Field, model_validator
from typing_extensions import TypedDict

try:
from pydantic.v1 import Field, root_validator # type: ignore[import]
except ImportError:
from pydantic import ( # type: ignore[assignment, no-redef]
Field,
root_validator,
)

import contextvars
import threading
import urllib.parse

import langsmith._internal._context as _context
from langsmith import schemas as ls_schemas
from langsmith import utils
Expand Down Expand Up @@ -195,7 +187,7 @@ class RunTree(ls_schemas.RunBase):
parent_dotted_order: Optional[str] = Field(default=None, exclude=True)
child_runs: list[RunTree] = Field(
default_factory=list,
exclude={"__all__": {"parent_run_id"}},
exclude=cast(Any, {"__all__": {"parent_run_id"}}),
)
session_name: str = Field(
default_factory=lambda: utils.get_tracer_project() or "default",
Expand All @@ -220,15 +212,14 @@ class RunTree(ls_schemas.RunBase):
description="Projects to replicate this run to with optional updates.",
)

class Config:
"""Pydantic model configuration."""

arbitrary_types_allowed = True
allow_population_by_field_name = True
extra = "ignore"
model_config = ConfigDict(
arbitrary_types_allowed=True,
populate_by_name=True,
extra="ignore",
)

@root_validator(pre=True)
def infer_defaults(cls, values: dict) -> dict:
@model_validator(mode="before")
def infer_defaults(cls, values: dict[str, Any]) -> dict[str, Any]:
"""Assign name to the run."""
if values.get("name") is None and values.get("serialized") is not None:
Comment on lines +221 to 224

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

[P1] Decorate RunTree pre-validator as classmethod

The new infer_defaults validator is registered with @model_validator(mode="before") but isn’t a classmethod. Pydantic v2 invokes mode="before" validators on the class, so defining it as an instance method causes a TypeError when constructing a RunTree (infer_defaults() is missing the positional values argument) and prevents any run tree from being created. Add @classmethod (and keep cls as the first parameter) so the validator runs correctly.

Useful? React with 👍 / 👎.

Copy link
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

nvm the ai is wrong here this is untrue

if "name" in values["serialized"]:
Expand Down Expand Up @@ -268,21 +259,19 @@ def infer_defaults(cls, values: dict) -> dict:
values["replicas"] = _ensure_write_replicas(values["replicas"])
return values

@root_validator(pre=False)
def ensure_dotted_order(cls, values: dict) -> dict:
@model_validator(mode="after")
def ensure_dotted_order(self) -> RunTree:
"""Ensure the dotted order of the run."""
current_dotted_order = values.get("dotted_order")
current_dotted_order = self.dotted_order
if current_dotted_order and current_dotted_order.strip():
return values
current_dotted_order = _create_current_dotted_order(
values["start_time"], values["id"]
)
parent_dotted_order = values.get("parent_dotted_order")
return self
current_dotted_order = _create_current_dotted_order(self.start_time, self.id)
parent_dotted_order = self.parent_dotted_order
if parent_dotted_order is not None:
values["dotted_order"] = parent_dotted_order + "." + current_dotted_order
self.dotted_order = parent_dotted_order + "." + current_dotted_order
else:
values["dotted_order"] = current_dotted_order
return values
self.dotted_order = current_dotted_order
return self

@property
def client(self) -> Client:
Expand Down
95 changes: 33 additions & 62 deletions python/langsmith/schemas.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@
from datetime import datetime, timedelta, timezone
from decimal import Decimal
from enum import Enum
from pathlib import Path
from typing import (
Any,
NamedTuple,
Expand All @@ -15,33 +16,19 @@
)
from uuid import UUID

from typing_extensions import NotRequired, TypedDict

try:
from pydantic.v1 import (
BaseModel,
Field, # type: ignore[import]
PrivateAttr,
StrictBool,
StrictFloat,
StrictInt,
)
except ImportError:
from pydantic import ( # type: ignore[assignment]
BaseModel,
Field,
PrivateAttr,
StrictBool,
StrictFloat,
StrictInt,
)

from pathlib import Path

from typing_extensions import Literal
from pydantic import (
BaseModel,
ConfigDict,
Field,
PrivateAttr,
StrictBool,
StrictFloat,
StrictInt,
)
from typing_extensions import Literal, NotRequired, TypedDict

SCORE_TYPE = Union[StrictBool, StrictInt, StrictFloat, None]
VALUE_TYPE = Union[dict, str, None]
VALUE_TYPE = Union[dict, str, StrictBool, StrictInt, StrictFloat, None]


class Attachment(NamedTuple):
Expand Down Expand Up @@ -96,11 +83,7 @@ class ExampleBase(BaseModel):
outputs: Optional[dict[str, Any]] = Field(default=None)
metadata: Optional[dict[str, Any]] = Field(default=None)

class Config:
"""Configuration class for the schema."""

frozen = True
arbitrary_types_allowed = True
model_config = ConfigDict(frozen=True, arbitrary_types_allowed=True)


class _AttachmentDict(TypedDict):
Expand Down Expand Up @@ -221,10 +204,7 @@ class ExampleUpdate(BaseModel):
attachments: Optional[Attachments] = None
attachments_operations: Optional[AttachmentsOperations] = None

class Config:
"""Configuration class for the schema."""

frozen = True
model_config = ConfigDict(frozen=True)

def __init__(self, **data):
"""Initialize from dict."""
Expand All @@ -249,10 +229,7 @@ class DatasetBase(BaseModel):
description: Optional[str] = None
data_type: Optional[DataType] = None

class Config:
"""Configuration class for the schema."""

frozen = True
model_config = ConfigDict(frozen=True)


DatasetTransformationType = Literal[
Expand Down Expand Up @@ -412,10 +389,7 @@ def __repr__(self):
"""Return a string representation of the RunBase object."""
return f"{self.__class__}(id={self.id}, name='{self.name}', run_type='{self.run_type}')"

class Config:
"""Configuration class for the schema."""

arbitrary_types_allowed = True
model_config = ConfigDict(arbitrary_types_allowed=True)


class Run(RunBase):
Expand Down Expand Up @@ -672,10 +646,7 @@ class FeedbackBase(BaseModel):
extra: Optional[dict] = None
"""The metadata of the feedback."""

class Config:
"""Configuration class for the schema."""

frozen = True
model_config = ConfigDict(frozen=True)


class FeedbackCategory(TypedDict, total=False):
Expand Down Expand Up @@ -783,35 +754,35 @@ class TracerSessionResult(TracerSession):
Sessions are also referred to as "Projects" in the UI.
"""

run_count: Optional[int]
run_count: Optional[int] = None
"""The number of runs in the project."""
latency_p50: Optional[timedelta]
latency_p50: Optional[timedelta] = None
"""The median (50th percentile) latency for the project."""
latency_p99: Optional[timedelta]
latency_p99: Optional[timedelta] = None
"""The 99th percentile latency for the project."""
total_tokens: Optional[int]
total_tokens: Optional[int] = None
"""The total number of tokens consumed in the project."""
prompt_tokens: Optional[int]
prompt_tokens: Optional[int] = None
"""The total number of prompt tokens consumed in the project."""
completion_tokens: Optional[int]
completion_tokens: Optional[int] = None
"""The total number of completion tokens consumed in the project."""
last_run_start_time: Optional[datetime]
last_run_start_time: Optional[datetime] = None
"""The start time of the last run in the project."""
feedback_stats: Optional[dict[str, Any]]
feedback_stats: Optional[dict[str, Any]] = None
"""Feedback stats for the project."""
run_facets: Optional[list[dict[str, Any]]]
run_facets: Optional[list[dict[str, Any]]] = None
"""Facets for the runs in the project."""
total_cost: Optional[Decimal]
total_cost: Optional[Decimal] = None
"""The total estimated LLM cost associated with the completion tokens."""
prompt_cost: Optional[Decimal]
prompt_cost: Optional[Decimal] = None
"""The estimated cost associated with the prompt (input) tokens."""
completion_cost: Optional[Decimal]
completion_cost: Optional[Decimal] = None
"""The estimated cost associated with the completion tokens."""
first_token_p50: Optional[timedelta]
first_token_p50: Optional[timedelta] = None
"""The median (50th percentile) time to process the first token."""
first_token_p99: Optional[timedelta]
first_token_p99: Optional[timedelta] = None
"""The 99th percentile time to process the first token."""
error_rate: Optional[float]
error_rate: Optional[float] = None
"""The error rate for the project."""


Expand Down Expand Up @@ -893,7 +864,7 @@ class LangSmithInfo(BaseModel):
instance_flags: Optional[dict[str, Any]] = None


Example.update_forward_refs()
Example.model_rebuild()


class LangSmithSettings(BaseModel):
Expand Down
Loading
Loading