From bb392f8004ef0cdb436169819d989529f2fd1c84 Mon Sep 17 00:00:00 2001 From: vandit98 Date: Wed, 26 Nov 2025 19:27:03 +0530 Subject: [PATCH] Fix spurious label column when directories match split names --- .../folder_based_builder/folder_based_builder.py | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/src/datasets/packaged_modules/folder_based_builder/folder_based_builder.py b/src/datasets/packaged_modules/folder_based_builder/folder_based_builder.py index 182de467b14..5f000fcd986 100644 --- a/src/datasets/packaged_modules/folder_based_builder/folder_based_builder.py +++ b/src/datasets/packaged_modules/folder_based_builder/folder_based_builder.py @@ -147,6 +147,12 @@ def analyze(files_or_archives, downloaded_files_or_dirs, split): if self.config.drop_labels is None else not self.config.drop_labels ) + + if add_labels and labels: + common_split_names = {"train", "training", "test", "testing", "val", "valid", "validation", "dev", "eval"} + split_names = set(data_files.keys()) + if labels.issubset(common_split_names | split_names): + add_labels = False if add_labels: logger.info("Adding the labels inferred from data directories to the dataset's features...")