From 699f584d1cce3d4849798b57757f99401fd5e32d Mon Sep 17 00:00:00 2001 From: Sylvain Gugger Date: Mon, 23 Nov 2020 12:32:42 -0500 Subject: [PATCH 1/3] Change default cache path --- src/transformers/file_utils.py | 23 +++++++++++++++++++++-- 1 file changed, 21 insertions(+), 2 deletions(-) diff --git a/src/transformers/file_utils.py b/src/transformers/file_utils.py index dc9998d63321..f0c0430f182a 100644 --- a/src/transformers/file_utils.py +++ b/src/transformers/file_utils.py @@ -203,8 +203,27 @@ _tokenizers_available = False -default_cache_path = os.path.join(torch_cache_home, "transformers") - +old_default_cache_path = os.path.join(torch_cache_home, "transformers") +# New default cache, shared with the Datasets library +hf_cache_home = os.path.expanduser( + os.getenv("HF_HOME", os.path.join(os.getenv("XDG_CACHE_HOME", "~/.cache"), "huggingface")) +) +default_cache_path = os.path.join(hf_cache_home, "transformers") + +if ( + os.path.isdir(old_default_cache_path) + and "PYTORCH_PRETRAINED_BERT_CACHE" not in os.environ + and "PYTORCH_TRANSFORMERS_CACHE" not in os.environ + and "TRANSFORMERS_CACHE" not in os.environ +): + logger.warn( + "In Transformers v4.0.0, the default path to cache downloaded models changed from " + "'~/.cache/torch/transformers' to '~/.cache/huggingface/transformers'. Since you don't seem to have overridden " + "and '~/.cache/torch/transformers' is a directory that exists, we're moving it to " + "'~/.cache/huggingface/transformers' to avoid redownloading models you have already in the cache. You should " + "only see this message once." + ) + shutil.move(old_default_cache_path, default_cache_path) PYTORCH_PRETRAINED_BERT_CACHE = os.getenv("PYTORCH_PRETRAINED_BERT_CACHE", default_cache_path) PYTORCH_TRANSFORMERS_CACHE = os.getenv("PYTORCH_TRANSFORMERS_CACHE", PYTORCH_PRETRAINED_BERT_CACHE) From 31c28eb7f12f9bafeb13707814e984af5ae374ed Mon Sep 17 00:00:00 2001 From: Sylvain Gugger Date: Mon, 23 Nov 2020 13:40:24 -0500 Subject: [PATCH 2/3] Document changes --- docs/source/installation.md | 12 ++++++------ src/transformers/file_utils.py | 1 + 2 files changed, 7 insertions(+), 6 deletions(-) diff --git a/docs/source/installation.md b/docs/source/installation.md index 8e5a37af4b8d..2fe0c40aaf6c 100644 --- a/docs/source/installation.md +++ b/docs/source/installation.md @@ -70,15 +70,15 @@ to check 🤗 Transformers is properly installed. This library provides pretrained models that will be downloaded and cached locally. Unless you specify a location with `cache_dir=...` when you use methods like `from_pretrained`, these models will automatically be downloaded in the -folder given by the shell environment variable ``TRANSFORMERS_CACHE``. The default value for it will be the PyTorch -cache home followed by ``/transformers/`` (even if you don't have PyTorch installed). This is (by order of priority): +folder given by the shell environment variable ``TRANSFORMERS_CACHE``. The default value for it will be the Hugging +Face cache home followed by ``/transformers/``. This is (by order of priority): - * shell environment variable ``TORCH_HOME`` - * shell environment variable ``XDG_CACHE_HOME`` + ``/torch/`` - * default: ``~/.cache/torch/`` + * shell environment variable ``HF_HOME`` + * shell environment variable ``XDG_CACHE_HOME`` + ``/transformers/`` + * default: ``~/.cache/transformers/`` So if you don't have any specific environment variable set, the cache directory will be at -``~/.cache/torch/transformers/``. +``~/.cache/huggingface/transformers/``. **Note:** If you have set a shell environment variable for one of the predecessors of this library (``PYTORCH_TRANSFORMERS_CACHE`` or ``PYTORCH_PRETRAINED_BERT_CACHE``), those will be used if there is no shell diff --git a/src/transformers/file_utils.py b/src/transformers/file_utils.py index f0c0430f182a..ae05e11b38fb 100644 --- a/src/transformers/file_utils.py +++ b/src/transformers/file_utils.py @@ -210,6 +210,7 @@ ) default_cache_path = os.path.join(hf_cache_home, "transformers") +# Onetime move from the old location to the new one if no ENV variable has been set. if ( os.path.isdir(old_default_cache_path) and "PYTORCH_PRETRAINED_BERT_CACHE" not in os.environ From 9b84c0ed6eced80c2825447a19f1b113908f46a4 Mon Sep 17 00:00:00 2001 From: Sylvain Gugger <35901082+sgugger@users.noreply.github.com> Date: Mon, 23 Nov 2020 13:44:57 -0500 Subject: [PATCH 3/3] Apply suggestions from code review Co-authored-by: Lysandre Debut --- docs/source/installation.md | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/docs/source/installation.md b/docs/source/installation.md index 2fe0c40aaf6c..4e808c290bad 100644 --- a/docs/source/installation.md +++ b/docs/source/installation.md @@ -74,8 +74,8 @@ folder given by the shell environment variable ``TRANSFORMERS_CACHE``. The defau Face cache home followed by ``/transformers/``. This is (by order of priority): * shell environment variable ``HF_HOME`` - * shell environment variable ``XDG_CACHE_HOME`` + ``/transformers/`` - * default: ``~/.cache/transformers/`` + * shell environment variable ``XDG_CACHE_HOME`` + ``/huggingface/`` + * default: ``~/.cache/huggingface/`` So if you don't have any specific environment variable set, the cache directory will be at ``~/.cache/huggingface/transformers/``.