From 46a813b64423bdbe5181c961e76def29f51fb600 Mon Sep 17 00:00:00 2001 From: Albert Villanova del Moral <8515462+albertvillanova@users.noreply.github.com> Date: Thu, 24 Jun 2021 08:39:19 +0200 Subject: [PATCH 1/7] Switch some log info to debug --- src/datasets/arrow_dataset.py | 2 +- src/datasets/arrow_writer.py | 2 +- src/datasets/builder.py | 2 +- src/datasets/load.py | 2 +- src/datasets/utils/filelock.py | 4 ++-- 5 files changed, 6 insertions(+), 6 deletions(-) diff --git a/src/datasets/arrow_dataset.py b/src/datasets/arrow_dataset.py index 24d8aadfb60..5c34c349a3e 100644 --- a/src/datasets/arrow_dataset.py +++ b/src/datasets/arrow_dataset.py @@ -1320,7 +1320,7 @@ def set_format( self._format_kwargs = format_kwargs self._format_columns = columns self._output_all_columns = output_all_columns - logger.info( + logger.debug( "Set __getitem__(key) output type to %s for %s columns " " (when key is int or slice) and %s output other (un-formatted) columns.", "python objects" if type is None else type, diff --git a/src/datasets/arrow_writer.py b/src/datasets/arrow_writer.py index 713d2bbf7b3..a3f4fc7cfc7 100644 --- a/src/datasets/arrow_writer.py +++ b/src/datasets/arrow_writer.py @@ -423,7 +423,7 @@ def finalize(self, close_stream=True): self.pa_writer.close() if close_stream: self.stream.close() - logger.info( + logger.debug( "Done writing %s %s in %s bytes %s.", self._num_examples, self.unit, diff --git a/src/datasets/builder.py b/src/datasets/builder.py index 0e3a63acc48..2f7a762ad1d 100644 --- a/src/datasets/builder.py +++ b/src/datasets/builder.py @@ -729,7 +729,7 @@ def as_dataset( % (self.name, self._cache_dir_root) ) - logger.info( + logger.debug( "Constructing Dataset for split %s, from %s", split or ", ".join(self.info.splits), self._cache_dir ) diff --git a/src/datasets/load.py b/src/datasets/load.py index dc658d042d0..0302380ce6f 100644 --- a/src/datasets/load.py +++ b/src/datasets/load.py @@ -171,7 +171,7 @@ def get_imports(file_path: str): with open(file_path, mode="r", encoding="utf-8") as f: lines.extend(f.readlines()) - logger.info("Checking %s for additional imports.", file_path) + logger.debug("Checking %s for additional imports.", file_path) imports: List[Tuple[str, str, str, Optional[str]]] = [] is_in_docstring = False for line in lines: diff --git a/src/datasets/utils/filelock.py b/src/datasets/utils/filelock.py index b67ffbf90ec..3e5bacafe27 100644 --- a/src/datasets/utils/filelock.py +++ b/src/datasets/utils/filelock.py @@ -272,7 +272,7 @@ def acquire(self, timeout=None, poll_intervall=0.05): self._acquire() if self.is_locked: - logger().info("Lock %s acquired on %s", lock_id, lock_filename) + logger().debug("Lock %s acquired on %s", lock_id, lock_filename) break elif timeout >= 0 and time.time() - start_time > timeout: logger().debug("Timeout on acquiring lock %s on %s", lock_id, lock_filename) @@ -315,7 +315,7 @@ def release(self, force=False): logger().debug("Attempting to release lock %s on %s", lock_id, lock_filename) self._release() self._lock_counter = 0 - logger().info("Lock %s released on %s", lock_id, lock_filename) + logger().debug("Lock %s released on %s", lock_id, lock_filename) return None From e692e32ffb7a85ef930c9f038c7589d0a8d2661f Mon Sep 17 00:00:00 2001 From: Albert Villanova del Moral <8515462+albertvillanova@users.noreply.github.com> Date: Thu, 24 Jun 2021 08:40:18 +0200 Subject: [PATCH 2/7] Switch some log warning to info --- src/datasets/builder.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/datasets/builder.py b/src/datasets/builder.py index 2f7a762ad1d..36ada8ce3dc 100644 --- a/src/datasets/builder.py +++ b/src/datasets/builder.py @@ -507,7 +507,7 @@ def download_and_prepare( with FileLock(lock_path): data_exists = os.path.exists(self._cache_dir) if data_exists and download_mode == GenerateMode.REUSE_DATASET_IF_EXISTS: - logger.warning("Reusing dataset %s (%s)", self.name, self._cache_dir) + logger.info("Reusing dataset %s (%s)", self.name, self._cache_dir) # We need to update the info in case some splits were added in the meantime # for example when calling load_dataset from multiple workers. self.info = self._load_info() From cbcfbc0a8a6bd3cdfe5c61bd006530691d8d3c16 Mon Sep 17 00:00:00 2001 From: Albert Villanova del Moral <8515462+albertvillanova@users.noreply.github.com> Date: Thu, 24 Jun 2021 09:25:10 +0200 Subject: [PATCH 3/7] Fix test --- tests/test_load.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/tests/test_load.py b/tests/test_load.py index 74c9cb53549..833cd636c75 100644 --- a/tests/test_load.py +++ b/tests/test_load.py @@ -1,4 +1,5 @@ import importlib +import logging import os import shutil import tempfile @@ -271,6 +272,7 @@ def test_load_dataset_then_move_then_reload(dataset_loading_script_dir, data_dir del dataset os.rename(cache_dir1, cache_dir2) caplog.clear() + caplog.set_level(logging.INFO) dataset = load_dataset(dataset_loading_script_dir, data_dir=data_dir, split="train", cache_dir=cache_dir2) assert "Reusing dataset" in caplog.text assert dataset._fingerprint == fingerprint1, "for the caching mechanism to work, fingerprint should stay the same" From 55c87adefe17daae66bb32a63f020d95bd10c047 Mon Sep 17 00:00:00 2001 From: Albert Villanova del Moral <8515462+albertvillanova@users.noreply.github.com> Date: Thu, 24 Jun 2021 10:55:09 +0200 Subject: [PATCH 4/7] Fix test --- tests/test_load.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/test_load.py b/tests/test_load.py index 833cd636c75..0c7a9a6a72a 100644 --- a/tests/test_load.py +++ b/tests/test_load.py @@ -272,7 +272,7 @@ def test_load_dataset_then_move_then_reload(dataset_loading_script_dir, data_dir del dataset os.rename(cache_dir1, cache_dir2) caplog.clear() - caplog.set_level(logging.INFO) + caplog.set_level(logging.INFO, logger="datasets") dataset = load_dataset(dataset_loading_script_dir, data_dir=data_dir, split="train", cache_dir=cache_dir2) assert "Reusing dataset" in caplog.text assert dataset._fingerprint == fingerprint1, "for the caching mechanism to work, fingerprint should stay the same" From ffa833704f94265789997b41c1f8f4bd144a516d Mon Sep 17 00:00:00 2001 From: Albert Villanova del Moral <8515462+albertvillanova@users.noreply.github.com> Date: Fri, 25 Jun 2021 10:26:16 +0200 Subject: [PATCH 5/7] Revert "Switch some log warning to info" This reverts commit e692e32ffb7a85ef930c9f038c7589d0a8d2661f. --- src/datasets/builder.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/datasets/builder.py b/src/datasets/builder.py index 36ada8ce3dc..2f7a762ad1d 100644 --- a/src/datasets/builder.py +++ b/src/datasets/builder.py @@ -507,7 +507,7 @@ def download_and_prepare( with FileLock(lock_path): data_exists = os.path.exists(self._cache_dir) if data_exists and download_mode == GenerateMode.REUSE_DATASET_IF_EXISTS: - logger.info("Reusing dataset %s (%s)", self.name, self._cache_dir) + logger.warning("Reusing dataset %s (%s)", self.name, self._cache_dir) # We need to update the info in case some splits were added in the meantime # for example when calling load_dataset from multiple workers. self.info = self._load_info() From afcd087524524218127f971ba7a4889dc8b368fa Mon Sep 17 00:00:00 2001 From: Albert Villanova del Moral <8515462+albertvillanova@users.noreply.github.com> Date: Fri, 25 Jun 2021 10:33:17 +0200 Subject: [PATCH 6/7] Revert unnecessary test fix --- tests/test_load.py | 1 - 1 file changed, 1 deletion(-) diff --git a/tests/test_load.py b/tests/test_load.py index 0c7a9a6a72a..8963b3bebe7 100644 --- a/tests/test_load.py +++ b/tests/test_load.py @@ -272,7 +272,6 @@ def test_load_dataset_then_move_then_reload(dataset_loading_script_dir, data_dir del dataset os.rename(cache_dir1, cache_dir2) caplog.clear() - caplog.set_level(logging.INFO, logger="datasets") dataset = load_dataset(dataset_loading_script_dir, data_dir=data_dir, split="train", cache_dir=cache_dir2) assert "Reusing dataset" in caplog.text assert dataset._fingerprint == fingerprint1, "for the caching mechanism to work, fingerprint should stay the same" From f47a45abce6b053daece94fec733fc57806ee808 Mon Sep 17 00:00:00 2001 From: Albert Villanova del Moral <8515462+albertvillanova@users.noreply.github.com> Date: Fri, 25 Jun 2021 10:37:34 +0200 Subject: [PATCH 7/7] Fix style --- tests/test_load.py | 1 - 1 file changed, 1 deletion(-) diff --git a/tests/test_load.py b/tests/test_load.py index 8963b3bebe7..74c9cb53549 100644 --- a/tests/test_load.py +++ b/tests/test_load.py @@ -1,5 +1,4 @@ import importlib -import logging import os import shutil import tempfile