diff --git a/src/datasets/builder.py b/src/datasets/builder.py index 410bbf3dd0d..767d44e25d4 100644 --- a/src/datasets/builder.py +++ b/src/datasets/builder.py @@ -253,6 +253,11 @@ def __init__( # Prepare config: DatasetConfig contains name, version and description but can be extended by each dataset if "features" in inspect.signature(self.BUILDER_CONFIG_CLASS.__init__).parameters and features is not None: config_kwargs["features"] = features + # Discard default config parameters + if "data_files" in config_kwargs and config_kwargs["data_files"] is None: + del config_kwargs["data_files"] + if "data_dir" in config_kwargs and config_kwargs["data_dir"] is None: + del config_kwargs["data_dir"] self.config, self.config_id = self._create_builder_config( name, custom_features=features, diff --git a/tests/test_builder.py b/tests/test_builder.py index 3a9d665e709..71b2c07b35a 100644 --- a/tests/test_builder.py +++ b/tests/test_builder.py @@ -643,6 +643,11 @@ def test_cache_dir_for_config_kwargs(self): def test_config_names(self): with tempfile.TemporaryDirectory() as tmp_dir: + + with self.assertRaises(ValueError) as error_context: + DummyBuilderWithMultipleConfigs(cache_dir=tmp_dir, data_files=None, data_dir=None) + self.assertIn("Please pick one among the available configs", str(error_context.exception)) + dummy_builder = DummyBuilderWithMultipleConfigs(cache_dir=tmp_dir, name="a") self.assertEqual(dummy_builder.config.name, "a")