Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
17 commits
Select commit Hold shift + click to select a range
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions src/giskard_hub/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@
from .client import HubClient
from .data import Dataset, Model, Project
from .data.chat import ChatMessage
from .data.chat_test_case import ChatTestCase
from .data.conversation import Conversation

hub_url: str | None = None
Expand All @@ -11,6 +12,7 @@

__all__ = [
"Dataset",
"ChatTestCase",
"Conversation",
"ChatMessage",
"Project",
Expand Down
10 changes: 9 additions & 1 deletion src/giskard_hub/client.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,13 +11,16 @@
from .data.dataset import Dataset
from .data.model import Model, ModelOutput
from .errors import HubConnectionError
from .resources.chat_test_cases import ChatTestCasesResource
from .resources.conversations import ConversationsResource
from .resources.datasets import DatasetsResource
from .resources.evaluations import EvaluationsResource
from .resources.models import ModelsResource
from .resources.projects import ProjectsResource


# pylint: disable=too-many-instance-attributes
# The `conversations` resource is deprecated and will be removed in the future.
class HubClient(SyncClient):
"""Client class to handle interaction with the hub.

Expand All @@ -29,6 +32,9 @@ class HubClient(SyncClient):
datasets : DatasetsResource
Resource to interact with datasets.

chat_test_cases : ChatTestCasesResource
Resource to interact with chat test cases.

conversations : ConversationsResource
Resource to interact with conversations.

Expand All @@ -44,6 +50,7 @@ class HubClient(SyncClient):

projects: ProjectsResource
datasets: DatasetsResource
chat_test_cases: ChatTestCasesResource
conversations: ConversationsResource
models: ModelsResource
evaluations: EvaluationsResource
Expand Down Expand Up @@ -120,6 +127,7 @@ def __init__(
# Define the resources
self.projects = ProjectsResource(self)
self.datasets = DatasetsResource(self)
self.chat_test_cases = ChatTestCasesResource(self)
self.conversations = ConversationsResource(self)
self.models = ModelsResource(self)
self.evaluations = EvaluationsResource(self)
Expand Down Expand Up @@ -149,7 +157,7 @@ def evaluate(
dataset : str | Dataset
ID of the dataset that will be used for the evaluation, or the dataset entity.
tags: List[str], optional
List of tags to filter the conversations that will be evaluated.
List of tags to filter the conversations (chat test cases) that will be evaluated.
model : str | Model | Callable[[List[ChatMessage]], ModelOutput | str]
ID of the model to evaluate, or a model entity, or a local model function.
A local model function is a function that takes a list of messages and returns a `ModelOutput` or a string.
Expand Down
2 changes: 2 additions & 0 deletions src/giskard_hub/data/__init__.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
from .chat import ChatMessage
from .chat_test_case import ChatTestCase
from .conversation import Conversation
from .dataset import Dataset
from .evaluation import EvaluationRun, Metric, ModelOutput
Expand All @@ -9,6 +10,7 @@
"Project",
"Dataset",
"Conversation",
"ChatTestCase",
"ChatMessage",
"Model",
"ModelOutput",
Expand Down
59 changes: 59 additions & 0 deletions src/giskard_hub/data/chat_test_case.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,59 @@
from __future__ import annotations

from dataclasses import dataclass, field
from typing import Any, Dict, List, Optional

from giskard_hub.data.check import CheckConfig, _format_checks_to_cli

from ._entity import Entity
from .chat import ChatMessage, ChatMessageWithMetadata


@dataclass
class ChatTestCase(Entity):
"""A Dataset entry representing a chat test case.

Attributes
----------
messages : List[ChatMessage]
List of messages in the chat test case. Each message is an object with a role and content attributes.
demo_output : Optional[ChatMessageWithMetadata], optional
Output of the agent for demonstration purposes.
tags : List[str], optional
List of tags for the chat test case.
checks : List[CheckConfig], optional
List of checks to be performed on the chat test case.
"""

messages: List[ChatMessage] = field(default_factory=list)
demo_output: Optional[ChatMessageWithMetadata] = field(default=None)
tags: List[str] = field(default_factory=list)
checks: List[CheckConfig] = field(default_factory=list)

@classmethod
def from_dict(cls, data: Dict[str, Any], **kwargs) -> "ChatTestCase":
# Process messages
messages = []
if data.get("messages"):
messages = [ChatMessage.from_dict(msg) for msg in data["messages"]]

# Process demo_output
demo_output = None
if data.get("demo_output"):
demo_output = ChatMessageWithMetadata.from_dict(data["demo_output"])

# Process checks
checks = _format_checks_to_cli(data.get("checks", []))

# Create the object with processed data
obj = super().from_dict(
{
**data,
"messages": messages,
"demo_output": demo_output,
"checks": checks,
},
**kwargs,
)

return obj
46 changes: 23 additions & 23 deletions src/giskard_hub/data/check.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,29 +20,22 @@ class CheckConfig(BaseData):
enabled: bool = True


def _format_checks_to_cli(
checks: List[Union[TestCaseCheckConfig, Dict[str, Any]]],
) -> List[CheckConfig]:
def _format_checks_to_backend(
checks: List[Union[CheckConfig, Dict[str, Any]]],
) -> List[TestCaseCheckConfig]:
if not checks:
return []

checks = [check if isinstance(check, dict) else check.to_dict() for check in checks]

return [
CheckConfig.from_dict(
TestCaseCheckConfig.from_dict(
{
"identifier": check["identifier"],
"enabled": check["enabled"],
"enabled": True, # Default value for enabled
**check,
**(
{"params": params}
if check.get("assertions")
and (
params := {
k: v
for k, v in check["assertions"][0].items()
if k != "type"
}
)
{"assertions": [{"type": check["identifier"], **check["params"]}]}
if check.get("params")
else {}
),
}
Expand All @@ -51,22 +44,29 @@ def _format_checks_to_cli(
]


def _format_checks_to_backend(
checks: List[Union[CheckConfig, Dict[str, Any]]],
) -> List[TestCaseCheckConfig]:
def _format_checks_to_cli(
checks: List[Union[TestCaseCheckConfig, Dict[str, Any]]],
) -> List[CheckConfig]:
if not checks:
return []

checks = [check if isinstance(check, dict) else check.to_dict() for check in checks]

return [
TestCaseCheckConfig.from_dict(
CheckConfig.from_dict(
{
"enabled": True, # Default value for enabled
**check,
"identifier": check["identifier"],
"enabled": check["enabled"],
**(
{"assertions": [{"type": check["identifier"], **check["params"]}]}
if check.get("params")
{"params": params}
if check.get("assertions")
and (
params := {
k: v
for k, v in check["assertions"][0].items()
if k != "type"
}
)
else {}
),
}
Expand Down
44 changes: 9 additions & 35 deletions src/giskard_hub/data/conversation.py
Original file line number Diff line number Diff line change
@@ -1,15 +1,14 @@
from __future__ import annotations

from dataclasses import dataclass, field
from typing import Any, Dict, List, Optional
import warnings
from dataclasses import dataclass
from typing import Any, Dict

from ._entity import Entity
from .chat import ChatMessage, ChatMessageWithMetadata
from .check import CheckConfig, _format_checks_to_cli
from .chat_test_case import ChatTestCase


@dataclass
class Conversation(Entity):
class Conversation(ChatTestCase):
"""A Dataset entry representing a conversation.

Attributes
Expand All @@ -24,35 +23,10 @@ class Conversation(Entity):
List of checks to be performed on the conversation.
"""

messages: List[ChatMessage] = field(default_factory=list)
demo_output: Optional[ChatMessageWithMetadata] = field(default=None)
tags: List[str] = field(default_factory=list)
checks: List[CheckConfig] = field(default_factory=list)

@classmethod
def from_dict(cls, data: Dict[str, Any], **kwargs) -> "Conversation":
# Process messages
messages = []
if data.get("messages"):
messages = [ChatMessage.from_dict(msg) for msg in data["messages"]]

# Process demo_output
demo_output = None
if data.get("demo_output"):
demo_output = ChatMessageWithMetadata.from_dict(data["demo_output"])

# Process checks
checks = _format_checks_to_cli(data.get("checks", []))

# Create the object with processed data
obj = super().from_dict(
{
**data,
"messages": messages,
"demo_output": demo_output,
"checks": checks,
},
**kwargs,
warnings.warn(
"Conversation is deprecated and will be removed. Please use ChatTestCase instead.",
category=DeprecationWarning,
)

return obj
return super().from_dict(data, **kwargs)
28 changes: 28 additions & 0 deletions src/giskard_hub/data/dataset.py
Original file line number Diff line number Diff line change
@@ -1,9 +1,11 @@
from __future__ import annotations

import warnings
from dataclasses import dataclass, field
from typing import List, Optional

from ._entity import Entity
from .chat_test_case import ChatTestCase
from .conversation import Conversation


Expand All @@ -19,12 +21,20 @@ class Dataset(Entity):
@property
def conversations(self):
"""Return the conversations of the dataset."""
warnings.warn(
"Conversation is deprecated and will be removed. Please use ChatTestCase operations instead.",
category=DeprecationWarning,
)
if self._client and self.id:
return self._client.conversations.list(dataset_id=self.id)
return None

def create_conversation(self, conversation: Conversation):
"""Add a conversation to the dataset."""
warnings.warn(
"Conversation is deprecated and will be removed. Please use ChatTestCase operations instead.",
category=DeprecationWarning,
)
if not self._client or not self.id:
raise ValueError(
"This dataset instance is detached or unsaved, cannot add conversation."
Expand All @@ -33,3 +43,21 @@ def create_conversation(self, conversation: Conversation):
return self._client.conversations.create(
dataset_id=self.id, **conversation.to_dict()
)

@property
def chat_test_cases(self):
"""Return the chat test cases of the dataset."""
if self._client and self.id:
return self._client.chat_test_cases.list(dataset_id=self.id)
return None

def create_chat_test_case(self, chat_test_case: ChatTestCase):
"""Add a chat test case to the dataset."""
if not self._client or not self.id:
raise ValueError(
"This dataset instance is detached or unsaved, cannot add chat test case."
)

return self._client.chat_test_cases.create(
dataset_id=self.id, **chat_test_case.to_dict()
)
10 changes: 8 additions & 2 deletions src/giskard_hub/data/evaluation.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@

from ._base import BaseData
from ._entity import Entity
from .chat_test_case import ChatTestCase
from .conversation import Conversation
from .dataset import Dataset
from .model import Model, ModelOutput
Expand Down Expand Up @@ -178,15 +179,20 @@ class EvaluationEntry(Entity):
"""Evaluation entry."""

run_id: str
conversation: Conversation
conversation: Conversation | ChatTestCase
model_output: ModelOutput | None = None
results: List[EvaluatorResult] = field(default_factory=list)
status: TaskStatus = TaskStatus.RUNNING

@classmethod
def from_dict(cls, data: Dict[str, Any], **kwargs) -> "EvaluationEntry":
data = dict(data)
data["conversation"] = Conversation.from_dict(data["conversation"])

if "chat_test_case" in data:
data["conversation"] = ChatTestCase.from_dict(data["chat_test_case"])
else:
data["conversation"] = Conversation.from_dict(data["conversation"])

output = data.get("output")
data["model_output"] = ModelOutput.from_dict(output) if output else None

Expand Down
2 changes: 2 additions & 0 deletions src/giskard_hub/resources/__init__.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
from .chat_test_cases import ChatTestCasesResource
from .conversations import ConversationsResource
from .datasets import DatasetsResource
from .evaluations import EvaluationsResource
Expand All @@ -7,6 +8,7 @@
__all__ = [
"ProjectsResource",
"DatasetsResource",
"ChatTestCasesResource",
"ConversationsResource",
"ModelsResource",
"EvaluationsResource",
Expand Down
Loading