Skip to content
Merged
Show file tree
Hide file tree
Changes from 2 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions pyrit/datasets/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@
from .fetch_example_datasets import (
fetch_adv_bench_dataset,
fetch_aya_redteaming_dataset,
fetch_babelscape_alert_dataset,
fetch_decoding_trust_stereotypes_dataset,
fetch_examples,
fetch_forbidden_questions_dataset,
Expand Down Expand Up @@ -33,4 +34,5 @@
"fetch_llm_latent_adversarial_training_harmful_dataset",
"fetch_tdc23_redteaming_dataset",
"fetch_librAI_do_not_answer_dataset",
"fetch_babelscape_alert_dataset",
]
42 changes: 42 additions & 0 deletions pyrit/datasets/fetch_example_datasets.py
Original file line number Diff line number Diff line change
Expand Up @@ -928,6 +928,48 @@ def fetch_aya_redteaming_dataset(
source="https://huggingface.co/datasets/CohereForAI/aya_redteaming",
)
)

seed_prompt_dataset = SeedPromptDataset(prompts=seed_prompts)
return seed_prompt_dataset

def fetch_babelscape_alert_dataset(category: Optional[str] = None) -> SeedPromptDataset:
"""
Fetch the Babelscape/ALERT dataset and create a SeedPromptDataset.

Args:
category (str): The dataset category, "alert" or "alert_adversarial"

Returns:
SeedPromptDataset: A SeedPromptDataset containing the examples.
"""

data_categories = None
if not category: # if category is not specified, read both subsets
data_categories = ["alert_adversarial", "alert"]
elif category not in ["alert_adversarial", "alert"]:
raise ValueError(f"Invalid Parameter: {category}. Expected 'alert_adversarial' or 'alert'")
else:
data_categories = [category]

# Load specified subset or both catagories
for name in data_categories:
data = load_dataset("Babelscape/ALERT", name)
prompts = [item["prompt"] for item in data["test"]]

# Create SeedPrompt instances from each example in 'prompts'
seed_prompts = [
SeedPrompt(
value=prompt,
data_type="text",
name="Babelscape/ALERT",
dataset_name="Babelscape/ALERT",
description="""ALERT by Babelscape is a dataset that consists
of two different categories, 'alert' with 15k red teaming prompts,
and 'alert_adversarial' with 30k adversarial red teaming prompts.""",
source="https://huggingface.co/datasets/Babelscape/ALERT",
)
for prompt in prompts
]

seed_prompt_dataset = SeedPromptDataset(prompts=seed_prompts)
return seed_prompt_dataset