Giskard-AI · henchaves · Jun 13, 2025 · May 20, 2025 · May 20, 2025 · May 20, 2025
diff --git a/src/giskard_hub/__init__.py b/src/giskard_hub/__init__.py
@@ -3,6 +3,7 @@
 from .client import HubClient
 from .data import Dataset, Model, Project
 from .data.chat import ChatMessage
+from .data.chat_test_case import ChatTestCase
 from .data.conversation import Conversation
 
 hub_url: str | None = None
@@ -11,6 +12,7 @@
 
 __all__ = [
     "Dataset",
+    "ChatTestCase",
     "Conversation",
     "ChatMessage",
     "Project",

diff --git a/src/giskard_hub/client.py b/src/giskard_hub/client.py
@@ -11,13 +11,16 @@
 from .data.dataset import Dataset
 from .data.model import Model, ModelOutput
 from .errors import HubConnectionError
+from .resources.chat_test_cases import ChatTestCasesResource
 from .resources.conversations import ConversationsResource
 from .resources.datasets import DatasetsResource
 from .resources.evaluations import EvaluationsResource
 from .resources.models import ModelsResource
 from .resources.projects import ProjectsResource
 
 
+# pylint: disable=too-many-instance-attributes
+# The `conversations` resource is deprecated and will be removed in the future.
 class HubClient(SyncClient):
     """Client class to handle interaction with the hub.
 
@@ -29,6 +32,9 @@ class HubClient(SyncClient):
     datasets : DatasetsResource
         Resource to interact with datasets.
 
+    chat_test_cases : ChatTestCasesResource
+        Resource to interact with chat test cases.
+
     conversations : ConversationsResource
         Resource to interact with conversations.
 
@@ -44,6 +50,7 @@ class HubClient(SyncClient):
 
     projects: ProjectsResource
     datasets: DatasetsResource
+    chat_test_cases: ChatTestCasesResource
     conversations: ConversationsResource
     models: ModelsResource
     evaluations: EvaluationsResource
@@ -120,6 +127,7 @@ def __init__(
         # Define the resources
         self.projects = ProjectsResource(self)
         self.datasets = DatasetsResource(self)
+        self.chat_test_cases = ChatTestCasesResource(self)
         self.conversations = ConversationsResource(self)
         self.models = ModelsResource(self)
         self.evaluations = EvaluationsResource(self)
@@ -149,7 +157,7 @@ def evaluate(
         dataset : str | Dataset
             ID of the dataset that will be used for the evaluation, or the dataset entity.
         tags: List[str], optional
-            List of tags to filter the conversations that will be evaluated.
+            List of tags to filter the conversations (chat test cases) that will be evaluated.
         model : str | Model | Callable[[List[ChatMessage]], ModelOutput | str]
             ID of the model to evaluate, or a model entity, or a local model function.
             A local model function is a function that takes a list of messages and returns a `ModelOutput` or a string.

diff --git a/src/giskard_hub/data/__init__.py b/src/giskard_hub/data/__init__.py
@@ -1,4 +1,5 @@
 from .chat import ChatMessage
+from .chat_test_case import ChatTestCase
 from .conversation import Conversation
 from .dataset import Dataset
 from .evaluation import EvaluationRun, Metric, ModelOutput
@@ -9,6 +10,7 @@
     "Project",
     "Dataset",
     "Conversation",
+    "ChatTestCase",
     "ChatMessage",
     "Model",
     "ModelOutput",

diff --git a/src/giskard_hub/data/chat_test_case.py b/src/giskard_hub/data/chat_test_case.py
@@ -0,0 +1,59 @@
+from __future__ import annotations
+
+from dataclasses import dataclass, field
+from typing import Any, Dict, List, Optional
+
+from giskard_hub.data.check import CheckConfig, _format_checks_to_cli
+
+from ._entity import Entity
+from .chat import ChatMessage, ChatMessageWithMetadata
+
+
+@dataclass
+class ChatTestCase(Entity):
+    """A Dataset entry representing a chat test case.
+
+    Attributes
+    ----------
+    messages : List[ChatMessage]
+        List of messages in the chat test case. Each message is an object with a role and content attributes.
+    demo_output : Optional[ChatMessageWithMetadata], optional
+        Output of the agent for demonstration purposes.
+    tags : List[str], optional
+        List of tags for the chat test case.
+    checks : List[CheckConfig], optional
+        List of checks to be performed on the chat test case.
+    """
+
+    messages: List[ChatMessage] = field(default_factory=list)
+    demo_output: Optional[ChatMessageWithMetadata] = field(default=None)
+    tags: List[str] = field(default_factory=list)
+    checks: List[CheckConfig] = field(default_factory=list)
+
+    @classmethod
+    def from_dict(cls, data: Dict[str, Any], **kwargs) -> "ChatTestCase":
+        # Process messages
+        messages = []
+        if data.get("messages"):
+            messages = [ChatMessage.from_dict(msg) for msg in data["messages"]]
+
+        # Process demo_output
+        demo_output = None
+        if data.get("demo_output"):
+            demo_output = ChatMessageWithMetadata.from_dict(data["demo_output"])
+
+        # Process checks
+        checks = _format_checks_to_cli(data.get("checks", []))
+
+        # Create the object with processed data
+        obj = super().from_dict(
+            {
+                **data,
+                "messages": messages,
+                "demo_output": demo_output,
+                "checks": checks,
+            },
+            **kwargs,
+        )
+
+        return obj
diff --git a/src/giskard_hub/data/check.py b/src/giskard_hub/data/check.py
@@ -20,29 +20,22 @@ class CheckConfig(BaseData):
     enabled: bool = True
 
 
-def _format_checks_to_cli(
-    checks: List[Union[TestCaseCheckConfig, Dict[str, Any]]],
-) -> List[CheckConfig]:
+def _format_checks_to_backend(
+    checks: List[Union[CheckConfig, Dict[str, Any]]],
+) -> List[TestCaseCheckConfig]:
     if not checks:
         return []
 
     checks = [check if isinstance(check, dict) else check.to_dict() for check in checks]
 
     return [
-        CheckConfig.from_dict(
+        TestCaseCheckConfig.from_dict(
             {
-                "identifier": check["identifier"],
-                "enabled": check["enabled"],
+                "enabled": True,  # Default value for enabled
+                **check,
                 **(
-                    {"params": params}
-                    if check.get("assertions")
-                    and (
-                        params := {
-                            k: v
-                            for k, v in check["assertions"][0].items()
-                            if k != "type"
-                        }
-                    )
+                    {"assertions": [{"type": check["identifier"], **check["params"]}]}
+                    if check.get("params")
                     else {}
                 ),
             }
@@ -51,22 +44,29 @@ def _format_checks_to_cli(
     ]
 
 
-def _format_checks_to_backend(
-    checks: List[Union[CheckConfig, Dict[str, Any]]],
-) -> List[TestCaseCheckConfig]:
+def _format_checks_to_cli(
+    checks: List[Union[TestCaseCheckConfig, Dict[str, Any]]],
+) -> List[CheckConfig]:
     if not checks:
         return []
 
     checks = [check if isinstance(check, dict) else check.to_dict() for check in checks]
 
     return [
-        TestCaseCheckConfig.from_dict(
+        CheckConfig.from_dict(
             {
-                "enabled": True,  # Default value for enabled
-                **check,
+                "identifier": check["identifier"],
+                "enabled": check["enabled"],
                 **(
-                    {"assertions": [{"type": check["identifier"], **check["params"]}]}
-                    if check.get("params")
+                    {"params": params}
+                    if check.get("assertions")
+                    and (
+                        params := {
+                            k: v
+                            for k, v in check["assertions"][0].items()
+                            if k != "type"
+                        }
+                    )
                     else {}
                 ),
             }

diff --git a/src/giskard_hub/data/conversation.py b/src/giskard_hub/data/conversation.py
@@ -1,15 +1,14 @@
 from __future__ import annotations
 
-from dataclasses import dataclass, field
-from typing import Any, Dict, List, Optional
+import warnings
+from dataclasses import dataclass
+from typing import Any, Dict
 
-from ._entity import Entity
-from .chat import ChatMessage, ChatMessageWithMetadata
-from .check import CheckConfig, _format_checks_to_cli
+from .chat_test_case import ChatTestCase
 
 
 @dataclass
-class Conversation(Entity):
+class Conversation(ChatTestCase):
     """A Dataset entry representing a conversation.
 
     Attributes
@@ -24,35 +23,10 @@ class Conversation(Entity):
         List of checks to be performed on the conversation.
     """
 
-    messages: List[ChatMessage] = field(default_factory=list)
-    demo_output: Optional[ChatMessageWithMetadata] = field(default=None)
-    tags: List[str] = field(default_factory=list)
-    checks: List[CheckConfig] = field(default_factory=list)
-
     @classmethod
     def from_dict(cls, data: Dict[str, Any], **kwargs) -> "Conversation":
-        # Process messages
-        messages = []
-        if data.get("messages"):
-            messages = [ChatMessage.from_dict(msg) for msg in data["messages"]]
-
-        # Process demo_output
-        demo_output = None
-        if data.get("demo_output"):
-            demo_output = ChatMessageWithMetadata.from_dict(data["demo_output"])
-
-        # Process checks
-        checks = _format_checks_to_cli(data.get("checks", []))
-
-        # Create the object with processed data
-        obj = super().from_dict(
-            {
-                **data,
-                "messages": messages,
-                "demo_output": demo_output,
-                "checks": checks,
-            },
-            **kwargs,
+        warnings.warn(
+            "Conversation is deprecated and will be removed. Please use ChatTestCase instead.",
+            category=DeprecationWarning,
         )
-
-        return obj
+        return super().from_dict(data, **kwargs)
diff --git a/src/giskard_hub/data/dataset.py b/src/giskard_hub/data/dataset.py
@@ -1,9 +1,11 @@
 from __future__ import annotations
 
+import warnings
 from dataclasses import dataclass, field
 from typing import List, Optional
 
 from ._entity import Entity
+from .chat_test_case import ChatTestCase
 from .conversation import Conversation
 
 
@@ -19,12 +21,20 @@ class Dataset(Entity):
     @property
     def conversations(self):
         """Return the conversations of the dataset."""
+        warnings.warn(
+            "Conversation is deprecated and will be removed. Please use ChatTestCase operations instead.",
+            category=DeprecationWarning,
+        )
         if self._client and self.id:
             return self._client.conversations.list(dataset_id=self.id)
         return None
 
     def create_conversation(self, conversation: Conversation):
         """Add a conversation to the dataset."""
+        warnings.warn(
+            "Conversation is deprecated and will be removed. Please use ChatTestCase operations instead.",
+            category=DeprecationWarning,
+        )
         if not self._client or not self.id:
             raise ValueError(
                 "This dataset instance is detached or unsaved, cannot add conversation."
@@ -33,3 +43,21 @@ def create_conversation(self, conversation: Conversation):
         return self._client.conversations.create(
             dataset_id=self.id, **conversation.to_dict()
         )
+
+    @property
+    def chat_test_cases(self):
+        """Return the chat test cases of the dataset."""
+        if self._client and self.id:
+            return self._client.chat_test_cases.list(dataset_id=self.id)
+        return None
+
+    def create_chat_test_case(self, chat_test_case: ChatTestCase):
+        """Add a chat test case to the dataset."""
+        if not self._client or not self.id:
+            raise ValueError(
+                "This dataset instance is detached or unsaved, cannot add chat test case."
+            )
+
+        return self._client.chat_test_cases.create(
+            dataset_id=self.id, **chat_test_case.to_dict()
+        )
diff --git a/src/giskard_hub/data/evaluation.py b/src/giskard_hub/data/evaluation.py
@@ -11,6 +11,7 @@
 
 from ._base import BaseData
 from ._entity import Entity
+from .chat_test_case import ChatTestCase
 from .conversation import Conversation
 from .dataset import Dataset
 from .model import Model, ModelOutput
@@ -178,15 +179,20 @@ class EvaluationEntry(Entity):
     """Evaluation entry."""
 
     run_id: str
-    conversation: Conversation
+    conversation: Conversation | ChatTestCase
     model_output: ModelOutput | None = None
     results: List[EvaluatorResult] = field(default_factory=list)
     status: TaskStatus = TaskStatus.RUNNING
 
     @classmethod
     def from_dict(cls, data: Dict[str, Any], **kwargs) -> "EvaluationEntry":
         data = dict(data)
-        data["conversation"] = Conversation.from_dict(data["conversation"])
+
+        if "chat_test_case" in data:
+            data["conversation"] = ChatTestCase.from_dict(data["chat_test_case"])
+        else:
+            data["conversation"] = Conversation.from_dict(data["conversation"])
+
         output = data.get("output")
         data["model_output"] = ModelOutput.from_dict(output) if output else None
 

diff --git a/src/giskard_hub/resources/__init__.py b/src/giskard_hub/resources/__init__.py
@@ -1,3 +1,4 @@
+from .chat_test_cases import ChatTestCasesResource
 from .conversations import ConversationsResource
 from .datasets import DatasetsResource
 from .evaluations import EvaluationsResource
@@ -7,6 +8,7 @@
 __all__ = [
     "ProjectsResource",
     "DatasetsResource",
+    "ChatTestCasesResource",
     "ConversationsResource",
     "ModelsResource",
     "EvaluationsResource",