diff --git a/docs/source/installation.md b/docs/source/installation.md index 8e5a37af4b8d..4e808c290bad 100644 --- a/docs/source/installation.md +++ b/docs/source/installation.md @@ -70,15 +70,15 @@ to check 🤗 Transformers is properly installed. This library provides pretrained models that will be downloaded and cached locally. Unless you specify a location with `cache_dir=...` when you use methods like `from_pretrained`, these models will automatically be downloaded in the -folder given by the shell environment variable ``TRANSFORMERS_CACHE``. The default value for it will be the PyTorch -cache home followed by ``/transformers/`` (even if you don't have PyTorch installed). This is (by order of priority): +folder given by the shell environment variable ``TRANSFORMERS_CACHE``. The default value for it will be the Hugging +Face cache home followed by ``/transformers/``. This is (by order of priority): - * shell environment variable ``TORCH_HOME`` - * shell environment variable ``XDG_CACHE_HOME`` + ``/torch/`` - * default: ``~/.cache/torch/`` + * shell environment variable ``HF_HOME`` + * shell environment variable ``XDG_CACHE_HOME`` + ``/huggingface/`` + * default: ``~/.cache/huggingface/`` So if you don't have any specific environment variable set, the cache directory will be at -``~/.cache/torch/transformers/``. +``~/.cache/huggingface/transformers/``. **Note:** If you have set a shell environment variable for one of the predecessors of this library (``PYTORCH_TRANSFORMERS_CACHE`` or ``PYTORCH_PRETRAINED_BERT_CACHE``), those will be used if there is no shell diff --git a/src/transformers/file_utils.py b/src/transformers/file_utils.py index dc9998d63321..ae05e11b38fb 100644 --- a/src/transformers/file_utils.py +++ b/src/transformers/file_utils.py @@ -203,8 +203,28 @@ _tokenizers_available = False -default_cache_path = os.path.join(torch_cache_home, "transformers") - +old_default_cache_path = os.path.join(torch_cache_home, "transformers") +# New default cache, shared with the Datasets library +hf_cache_home = os.path.expanduser( + os.getenv("HF_HOME", os.path.join(os.getenv("XDG_CACHE_HOME", "~/.cache"), "huggingface")) +) +default_cache_path = os.path.join(hf_cache_home, "transformers") + +# Onetime move from the old location to the new one if no ENV variable has been set. +if ( + os.path.isdir(old_default_cache_path) + and "PYTORCH_PRETRAINED_BERT_CACHE" not in os.environ + and "PYTORCH_TRANSFORMERS_CACHE" not in os.environ + and "TRANSFORMERS_CACHE" not in os.environ +): + logger.warn( + "In Transformers v4.0.0, the default path to cache downloaded models changed from " + "'~/.cache/torch/transformers' to '~/.cache/huggingface/transformers'. Since you don't seem to have overridden " + "and '~/.cache/torch/transformers' is a directory that exists, we're moving it to " + "'~/.cache/huggingface/transformers' to avoid redownloading models you have already in the cache. You should " + "only see this message once." + ) + shutil.move(old_default_cache_path, default_cache_path) PYTORCH_PRETRAINED_BERT_CACHE = os.getenv("PYTORCH_PRETRAINED_BERT_CACHE", default_cache_path) PYTORCH_TRANSFORMERS_CACHE = os.getenv("PYTORCH_TRANSFORMERS_CACHE", PYTORCH_PRETRAINED_BERT_CACHE)