Skip to content
Merged
Show file tree
Hide file tree
Changes from 4 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
8 changes: 0 additions & 8 deletions script-docs/hub/ui/index.rst
Original file line number Diff line number Diff line change
Expand Up @@ -180,7 +180,6 @@ The interface below displays the knowledge base details that need to be filled o
- ``File``: The document to upload, containing the knowledge base content. Supported formats are:
- **JSON**: A JSON file containing an array of objects
- **JSONL**: A JSON Lines file with one object per line
- **CSV**: A CSV File (Deprecated)


**JSON/JSONL format requirements:**
Expand All @@ -197,13 +196,6 @@ Each object in your JSON or JSONL file should have the following structure:
- ``text`` (required): The document content
- ``topic`` (optional): The topic classification for the document

**CSV format requirements:**

.. warning::
**Importing a knowledge base with CSV is deprecated** and will be removed in future versions. Please use JSON or JSONL formats for new knowledge bases.

For CSV files, the format should have one column named ``text`` with the document content. If you're uploading a knowledge base with pre-defined topics, the file should have two columns with the first row labeled ``text,topic``.

**General rules for all formats:**
- If the ``text`` has a value but the ``topic`` is blank, the ``topic`` will be set to 'Others'. However, if all topics are blank, the ``topic`` will be automatically generated.
- If both the ``text`` and ``topic`` are blank, or if the ``text`` is blank but the ``topic`` has a value, the entry will not be imported.
Expand Down
16 changes: 8 additions & 8 deletions src/giskard_hub/resources/knowledge_bases.py
Original file line number Diff line number Diff line change
Expand Up @@ -27,8 +27,8 @@ def create( # pylint: disable=too-many-arguments
name: str,
data: Union[str, List[dict[str, str]]],
description: Union[str, None] = None,
document_column: Union[str, NotGiven] = NOT_GIVEN,
topic_column: Union[str, NotGiven] = NOT_GIVEN,
document_column: str = "text",
topic_column: str = "topic",
) -> KnowledgeBase:
"""
Create a new knowledge base.
Expand All @@ -40,13 +40,13 @@ def create( # pylint: disable=too-many-arguments
name : str
The name of the knowledge base.
data : str or list[dict[str, str]]
Either a filepath (str) to a JSON or JSONL file, or a list of dicts containing "text" and "topic" (optional) keys.
Either a filepath (str) to a JSON or JSONL file, or a list of dicts containing document and topic keys.
description : str, optional
Description of the knowledge base.
document_column : str, optional
Name of the document column.
topic_column : str, optional
Name of the topic column.
document_column : str, default "text"
Column name for document content in the data.
topic_column : str, default "topic"
Column name for topic classification in the data.

Returns
-------
Expand All @@ -58,7 +58,7 @@ def create( # pylint: disable=too-many-arguments
"project_id": project_id,
"name": name,
"description": description,
"column": document_column,
"document_column": document_column,
"topic_column": topic_column,
}
)
Expand Down
35 changes: 0 additions & 35 deletions tests/test_knowledge_bases.py
Original file line number Diff line number Diff line change
Expand Up @@ -329,41 +329,6 @@ def test_create_with_list_of_dicts(self, mock_client):
assert result.id == "kb-list"
assert result.n_documents == 3

def test_create_with_csv_file_error(self, mock_client):
"""Test creating a knowledge base with a CSV file (should error - not supported)."""
resource = KnowledgeBasesResource(mock_client)

# Create a temporary CSV file
csv_content = """text,topic
First document content,topic1
Second document content,topic2
Third document content,topic3"""

with tempfile.NamedTemporaryFile(
mode="w", suffix=".csv", delete=False
) as temp_file:
temp_file.write(csv_content)
temp_file_path = temp_file.name

try:
# Test that it raises an error for unsupported file format
with pytest.raises(
ValueError,
match="Only JSON and JSONL files are supported for file input",
):
resource.create(
project_id="project-id",
name="CSV Knowledge Base",
data=temp_file_path,
)

# Verify no API call was made due to validation error
mock_client.post.assert_not_called()

finally:
# Clean up temp file
Path(temp_file_path).unlink(missing_ok=True)

def test_create_with_single_dict_error(self, mock_client):
"""Test creating a knowledge base with a single dict (should error - needs list)."""
resource = KnowledgeBasesResource(mock_client)
Expand Down
Loading