From f18a57e6dcf8d2e9125b2104f4fd85b60641b77e Mon Sep 17 00:00:00 2001 From: Quentin Lhoest Date: Tue, 20 Jul 2021 17:55:48 +0200 Subject: [PATCH] don't use data_dir if it's None for config_id --- src/datasets/builder.py | 2 ++ tests/test_builder.py | 7 +++++++ 2 files changed, 9 insertions(+) diff --git a/src/datasets/builder.py b/src/datasets/builder.py index 0df966dd4dc..410bbf3dd0d 100644 --- a/src/datasets/builder.py +++ b/src/datasets/builder.py @@ -123,6 +123,8 @@ def create_config_id( # it was previously ignored before the introduction of config id because we didn't want # to change the config name. Now it's fine to take it into account for the config id. # config_kwargs_to_add_to_suffix.pop("data_dir", None) + if "data_dir" in config_kwargs_to_add_to_suffix and config_kwargs_to_add_to_suffix["data_dir"] is None: + del config_kwargs_to_add_to_suffix["data_dir"] if config_kwargs_to_add_to_suffix: # we don't care about the order of the kwargs config_kwargs_to_add_to_suffix = { diff --git a/tests/test_builder.py b/tests/test_builder.py index 27bbac945c0..3a9d665e709 100644 --- a/tests/test_builder.py +++ b/tests/test_builder.py @@ -1,6 +1,7 @@ import os import tempfile import types +from pathlib import Path from unittest import TestCase import numpy as np @@ -518,6 +519,12 @@ def test_generator_based_download_and_prepare(self): ) ) + def test_cache_dir_no_args(self): + with tempfile.TemporaryDirectory() as tmp_dir: + dummy_builder = DummyGeneratorBasedBuilder(cache_dir=tmp_dir, name="dummy", data_dir=None, data_files=None) + relative_cache_dir_parts = Path(dummy_builder._relative_data_dir()).parts + self.assertEqual(relative_cache_dir_parts, ("dummy_generator_based_builder", "dummy", "0.0.0")) + def test_cache_dir_for_data_files(self): with tempfile.TemporaryDirectory() as tmp_dir: dummy_data1 = os.path.join(tmp_dir, "dummy_data1.txt")