Skip to content

Commit 81bf441

Browse files
committed
detector: add startswith matching for StringDetector base class (#1075)
2 parents 57dd0d8 + 31db52d commit 81bf441

2 files changed

Lines changed: 48 additions & 23 deletions

File tree

garak/detectors/base.py

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -191,6 +191,9 @@ def detect(
191191
elif self.matchtype == "word":
192192
if re.search(r"\b" + s + r"\b", output):
193193
match = True
194+
elif self.matchtype == "startswith":
195+
if output.startswith(s):
196+
match = True
194197
else:
195198
raise ValueError(
196199
f"Don't know how to process matchtype: {self.matchtype}"

tests/detectors/test_detectors_base.py

Lines changed: 45 additions & 23 deletions
Original file line numberDiff line numberDiff line change
@@ -110,32 +110,39 @@ def test_none_outputs(string_detector):
110110
assert results == [1.0], "Failed to handle None outputs correctly"
111111

112112

113-
@pytest.mark.parametrize("matchtype", ["str", "word"])
113+
MATCHTYPE_AND_CASING_CASES = {
114+
"str": [
115+
(f"begin {TEST_STRINGS[0]}ing", True), # should match
116+
(
117+
f"begin {TEST_STRINGS[0].upper()}ING",
118+
False,
119+
), # shouldn't match case sensitive
120+
],
121+
"word": [
122+
(f"{TEST_STRINGS[0]} word", True), # should match
123+
(
124+
f"{TEST_STRINGS[0].upper()} word",
125+
False,
126+
), # shouldn't match case sensitive
127+
],
128+
"startswith": [
129+
(f"{TEST_STRINGS[0]} start", True), # should match
130+
(
131+
f"{TEST_STRINGS[0].upper()} start",
132+
False,
133+
), # shouldn't match case sensitive
134+
],
135+
}
136+
137+
138+
@pytest.mark.parametrize("matchtype", MATCHTYPE_AND_CASING_CASES.keys())
114139
def test_matchtype_with_case_sensitivity(matchtype):
115140
"""Test case sensitivity with different matchtypes"""
116141

117-
lowercase_test_word = "test"
118-
detector = garak.detectors.base.StringDetector([lowercase_test_word])
142+
detector = garak.detectors.base.StringDetector([TEST_STRINGS[0]])
119143
detector.matchtype = matchtype
120144

121-
test_cases = {
122-
"str": [
123-
(f"begin {lowercase_test_word}ing", True), # should match
124-
(
125-
f"begin {lowercase_test_word.upper()}ING",
126-
False,
127-
), # shouldn't match case sensitive
128-
],
129-
"word": [
130-
(f"{lowercase_test_word} word", True), # should match
131-
(
132-
f"{lowercase_test_word.upper()} word",
133-
False,
134-
), # shouldn't match case sensitive
135-
],
136-
}
137-
138-
for text, should_match_sensitive in test_cases[matchtype]:
145+
for text, should_match_sensitive in MATCHTYPE_AND_CASING_CASES[matchtype]:
139146
attempt = Attempt(prompt="Hello")
140147
attempt.outputs = [text]
141148

@@ -152,7 +159,7 @@ def test_matchtype_with_case_sensitivity(matchtype):
152159
], f"Case insensitive {matchtype} match failed for '{text}'"
153160

154161

155-
def test_word_boundary_matching():
162+
def test_stringdetector_word_boundary_matching():
156163
detector = garak.detectors.base.StringDetector(TEST_STRINGS)
157164
detector.matchtype = "word"
158165

@@ -173,4 +180,19 @@ def test_word_boundary_matching():
173180
], f"Word boundary match failed for '{text}', expected {match}"
174181

175182

176-
TEST_STRINGS
183+
def test_stringdetector_startswith():
184+
detector = garak.detectors.base.StringDetector(TEST_STRINGS)
185+
detector.matchtype = "startswith"
186+
187+
test_cases = [
188+
("this is a test", False),
189+
("tests always succeed", True),
190+
]
191+
192+
for text, match in test_cases:
193+
attempt = Attempt(prompt="")
194+
attempt.outputs = [text]
195+
results = detector.detect(attempt, case_sensitive=True)
196+
assert results == [
197+
1.0 if match else 0.0
198+
], f"startswith match failed for '{text}', expected {match}"

0 commit comments

Comments
 (0)