Skip to content
Merged
Show file tree
Hide file tree
Changes from 4 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion src/datasets/arrow_dataset.py
Original file line number Diff line number Diff line change
Expand Up @@ -1320,7 +1320,7 @@ def set_format(
self._format_kwargs = format_kwargs
self._format_columns = columns
self._output_all_columns = output_all_columns
logger.info(
logger.debug(
"Set __getitem__(key) output type to %s for %s columns "
" (when key is int or slice) and %s output other (un-formatted) columns.",
"python objects" if type is None else type,
Expand Down
2 changes: 1 addition & 1 deletion src/datasets/arrow_writer.py
Original file line number Diff line number Diff line change
Expand Up @@ -423,7 +423,7 @@ def finalize(self, close_stream=True):
self.pa_writer.close()
if close_stream:
self.stream.close()
logger.info(
logger.debug(
"Done writing %s %s in %s bytes %s.",
self._num_examples,
self.unit,
Expand Down
4 changes: 2 additions & 2 deletions src/datasets/builder.py
Original file line number Diff line number Diff line change
Expand Up @@ -507,7 +507,7 @@ def download_and_prepare(
with FileLock(lock_path):
data_exists = os.path.exists(self._cache_dir)
if data_exists and download_mode == GenerateMode.REUSE_DATASET_IF_EXISTS:
logger.warning("Reusing dataset %s (%s)", self.name, self._cache_dir)
logger.info("Reusing dataset %s (%s)", self.name, self._cache_dir)
# We need to update the info in case some splits were added in the meantime
# for example when calling load_dataset from multiple workers.
self.info = self._load_info()
Expand Down Expand Up @@ -729,7 +729,7 @@ def as_dataset(
% (self.name, self._cache_dir_root)
)

logger.info(
logger.debug(
"Constructing Dataset for split %s, from %s", split or ", ".join(self.info.splits), self._cache_dir
)

Expand Down
2 changes: 1 addition & 1 deletion src/datasets/load.py
Original file line number Diff line number Diff line change
Expand Up @@ -171,7 +171,7 @@ def get_imports(file_path: str):
with open(file_path, mode="r", encoding="utf-8") as f:
lines.extend(f.readlines())

logger.info("Checking %s for additional imports.", file_path)
logger.debug("Checking %s for additional imports.", file_path)
imports: List[Tuple[str, str, str, Optional[str]]] = []
is_in_docstring = False
for line in lines:
Expand Down
4 changes: 2 additions & 2 deletions src/datasets/utils/filelock.py
Original file line number Diff line number Diff line change
Expand Up @@ -272,7 +272,7 @@ def acquire(self, timeout=None, poll_intervall=0.05):
self._acquire()

if self.is_locked:
logger().info("Lock %s acquired on %s", lock_id, lock_filename)
logger().debug("Lock %s acquired on %s", lock_id, lock_filename)
break
elif timeout >= 0 and time.time() - start_time > timeout:
logger().debug("Timeout on acquiring lock %s on %s", lock_id, lock_filename)
Expand Down Expand Up @@ -315,7 +315,7 @@ def release(self, force=False):
logger().debug("Attempting to release lock %s on %s", lock_id, lock_filename)
self._release()
self._lock_counter = 0
logger().info("Lock %s released on %s", lock_id, lock_filename)
logger().debug("Lock %s released on %s", lock_id, lock_filename)

return None

Expand Down
2 changes: 2 additions & 0 deletions tests/test_load.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
import importlib
import logging
import os
import shutil
import tempfile
Expand Down Expand Up @@ -271,6 +272,7 @@ def test_load_dataset_then_move_then_reload(dataset_loading_script_dir, data_dir
del dataset
os.rename(cache_dir1, cache_dir2)
caplog.clear()
caplog.set_level(logging.INFO, logger="datasets")
dataset = load_dataset(dataset_loading_script_dir, data_dir=data_dir, split="train", cache_dir=cache_dir2)
assert "Reusing dataset" in caplog.text
assert dataset._fingerprint == fingerprint1, "for the caching mechanism to work, fingerprint should stay the same"
Expand Down