Skip to content

Commit 73c46c0

Browse files
committed
Added test
1 parent 8d6b859 commit 73c46c0

File tree

1 file changed

+27
-0
lines changed

1 file changed

+27
-0
lines changed

tests/test_datamodels.py

Lines changed: 27 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -193,3 +193,30 @@ def test_selected_with_duplicates_unhashable_values() -> None:
193193

194194
pairs = d.selected_with_duplicates
195195
assert pairs == [(selected, [(filtered, 1.0)])]
196+
197+
198+
def test_selected_with_duplicates_removes_internal_duplicates() -> None:
199+
"""Test that selected_with_duplicates removes internal duplicates that have the same hash."""
200+
selected = {"id": 0, "text": "hello"}
201+
filtered = {"id": 1, "text": "hello"}
202+
203+
d = DeduplicationResult(
204+
selected=[selected],
205+
filtered=[
206+
DuplicateRecord(filtered, exact=False, duplicates=[(selected, 0.95)]),
207+
DuplicateRecord(filtered, exact=False, duplicates=[(selected, 0.90)]),
208+
],
209+
threshold=0.8,
210+
columns=["text"],
211+
)
212+
213+
selected_with_duplicates = d.selected_with_duplicates
214+
215+
assert len(selected_with_duplicates) == 1
216+
217+
selected_record, duplicate_list = selected_with_duplicates[0]
218+
# Should keep the kept record unchanged
219+
assert selected_record == selected
220+
# The duplicate row must appear only once
221+
assert len(duplicate_list) == 1
222+
assert duplicate_list[0][0] == filtered

0 commit comments

Comments
 (0)