From d4e52de825d4c088d776fa18b04f38f260020915 Mon Sep 17 00:00:00 2001 From: Benno Weck <13520622+Bomme@users.noreply.github.com> Date: Thu, 11 Sep 2025 21:35:00 +0200 Subject: [PATCH 1/3] include dill==0.3.9 in requirements --- setup.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/setup.py b/setup.py index 9070f663b4c..04482c4cd41 100644 --- a/setup.py +++ b/setup.py @@ -113,7 +113,7 @@ # Minimum 21.0.0 to support `use_content_defined_chunking` in ParquetWriter "pyarrow>=21.0.0", # For smart caching dataset processing - "dill>=0.3.0,<0.3.9", # tmp pin until dill has official support for determinism see https://github.com/uqfoundation/dill/issues/19 + "dill>=0.3.0,<0.3.10", # tmp pin until dill has official support for determinism see https://github.com/uqfoundation/dill/issues/19 # For performance gains with apache arrow "pandas", # for downloading datasets over HTTPS From c84eb10e4c128fc7c62431abb5301867a502f06c Mon Sep 17 00:00:00 2001 From: Benno Weck <13520622+Bomme@users.noreply.github.com> Date: Thu, 11 Sep 2025 21:37:42 +0200 Subject: [PATCH 2/3] Extract duplicate DILL_VERSION checks to private function and add 0.3.9 support (#2) * Initial plan * Extract DILL_VERSION check to private function and add 0.3.9 support Co-authored-by: Bomme <13520622+Bomme@users.noreply.github.com> --------- Co-authored-by: copilot-swe-agent[bot] <198982749+Copilot@users.noreply.github.com> --- src/datasets/utils/_dill.py | 22 ++++++++++++---------- 1 file changed, 12 insertions(+), 10 deletions(-) diff --git a/src/datasets/utils/_dill.py b/src/datasets/utils/_dill.py index 0876a00d9db..92edc28638f 100644 --- a/src/datasets/utils/_dill.py +++ b/src/datasets/utils/_dill.py @@ -98,6 +98,16 @@ def proxy(func): return proxy +def _is_supported_dill_version(): + """Check if the current dill version is in the supported range.""" + return config.DILL_VERSION.release[:3] in [ + version.parse("0.3.6").release, + version.parse("0.3.7").release, + version.parse("0.3.8").release, + version.parse("0.3.9").release, + ] + + def dump(obj, file): """Pickle an object to a file.""" Pickler(file, recurse=True).dump(obj) @@ -115,11 +125,7 @@ def dumps(obj): def log(pickler, msg): dill._dill.log.info(msg) -elif config.DILL_VERSION.release[:3] in [ - version.parse("0.3.6").release, - version.parse("0.3.7").release, - version.parse("0.3.8").release, -]: +elif _is_supported_dill_version(): def log(pickler, msg): dill._dill.logger.trace(pickler, msg) @@ -312,11 +318,7 @@ def _save_code(pickler, obj): dill._dill.log.info("# Co") return -elif config.DILL_VERSION.release[:3] in [ - version.parse("0.3.6").release, - version.parse("0.3.7").release, - version.parse("0.3.8").release, -]: +elif _is_supported_dill_version(): # From: https://github.com/uqfoundation/dill/blob/dill-0.3.6/dill/_dill.py#L1104 @pklregister(CodeType) def save_code(pickler, obj): From 2a8008dc84ee63ceb78e8a6c400660a617899567 Mon Sep 17 00:00:00 2001 From: Benno Weck <13520622+Bomme@users.noreply.github.com> Date: Fri, 12 Sep 2025 22:53:09 +0200 Subject: [PATCH 3/3] bump dill to 0.4.0 --- setup.py | 2 +- src/datasets/utils/_dill.py | 1 + 2 files changed, 2 insertions(+), 1 deletion(-) diff --git a/setup.py b/setup.py index 43abad7c3bc..c58c5d271df 100644 --- a/setup.py +++ b/setup.py @@ -113,7 +113,7 @@ # Minimum 21.0.0 to support `use_content_defined_chunking` in ParquetWriter "pyarrow>=21.0.0", # For smart caching dataset processing - "dill>=0.3.0,<0.3.10", # tmp pin until dill has official support for determinism see https://github.com/uqfoundation/dill/issues/19 + "dill>=0.3.0,<0.4.1", # tmp pin until dill has official support for determinism see https://github.com/uqfoundation/dill/issues/19 # For performance gains with apache arrow "pandas", # for downloading datasets over HTTPS diff --git a/src/datasets/utils/_dill.py b/src/datasets/utils/_dill.py index 92edc28638f..fad95f7edf5 100644 --- a/src/datasets/utils/_dill.py +++ b/src/datasets/utils/_dill.py @@ -105,6 +105,7 @@ def _is_supported_dill_version(): version.parse("0.3.7").release, version.parse("0.3.8").release, version.parse("0.3.9").release, + version.parse("0.4.0").release, ]