From 80d8e9eb2919f34fcc8620d5bcf373547ca29f59 Mon Sep 17 00:00:00 2001 From: Sylvain Gugger Date: Fri, 5 Aug 2022 12:15:07 -0400 Subject: [PATCH 1/4] Move cache folder to just huggingface --- docs/source/en/installation.mdx | 10 +++++----- src/transformers/utils/hub.py | 15 +++++++++++---- 2 files changed, 16 insertions(+), 9 deletions(-) diff --git a/docs/source/en/installation.mdx b/docs/source/en/installation.mdx index f20490115842..c92ede856bf9 100644 --- a/docs/source/en/installation.mdx +++ b/docs/source/en/installation.mdx @@ -139,15 +139,15 @@ conda install -c huggingface transformers ## Cache setup -Pretrained models are downloaded and locally cached at: `~/.cache/huggingface/transformers/`. This is the default directory given by the shell environment variable `TRANSFORMERS_CACHE`. On Windows, the default directory is given by `C:\Users\username\.cache\huggingface\transformers`. You can change the shell environment variables shown below - in order of priority - to specify a different cache directory: +Pretrained models are downloaded and locally cached at: `~/.cache/huggingface/`. This is the default directory given by the shell environment variable `HUGGINGFACE_CACHE`. On Windows, the default directory is given by `C:\Users\username\.cache\huggingface`. You can change the shell environment variables shown below - in order of priority - to specify a different cache directory: -1. Shell environment variable (default): `TRANSFORMERS_CACHE`. -2. Shell environment variable: `HF_HOME` + `transformers/`. -3. Shell environment variable: `XDG_CACHE_HOME` + `/huggingface/transformers`. +1. Shell environment variable (default): `HUGGINGFACE_CACHE` or `TRANSFORMERS_CACHE`. +2. Shell environment variable: `HF_HOME`. +3. Shell environment variable: `XDG_CACHE_HOME` + `/huggingface`. -🤗 Transformers will use the shell environment variables `PYTORCH_TRANSFORMERS_CACHE` or `PYTORCH_PRETRAINED_BERT_CACHE` if you are coming from an earlier iteration of this library and have set those environment variables, unless you specify the shell environment variable `TRANSFORMERS_CACHE`. +🤗 Transformers will use the shell environment variables `PYTORCH_TRANSFORMERS_CACHE` or `PYTORCH_PRETRAINED_BERT_CACHE` if you are coming from an earlier iteration of this library and have set those environment variables, unless you specify the shell environment variable `HUGGINGFACE_CACHE`. diff --git a/src/transformers/utils/hub.py b/src/transformers/utils/hub.py index 9e81654cda7e..b78e9f8e211a 100644 --- a/src/transformers/utils/hub.py +++ b/src/transformers/utils/hub.py @@ -29,7 +29,7 @@ import warnings from contextlib import contextmanager from functools import partial -from hashlib import sha256 +from hashlib import new, sha256 from pathlib import Path from typing import BinaryIO, Dict, List, Optional, Tuple, Union from urllib.parse import urlparse @@ -81,7 +81,8 @@ def is_offline_mode(): hf_cache_home = os.path.expanduser( os.getenv("HF_HOME", os.path.join(os.getenv("XDG_CACHE_HOME", "~/.cache"), "huggingface")) ) -default_cache_path = os.path.join(hf_cache_home, "transformers") +hf_cache_home = os.getenv("HUGGINFACE_CACHE", hf_cache_home) +default_cache_path = hf_cache_home # Onetime move from the old location to the new one if no ENV variable has been set. if ( @@ -1475,9 +1476,15 @@ def move_to_new_cache(file, repo, filename, revision, etag, commit_hash): clean_files_for(file) -def move_cache(cache_dir=None, token=None): +def move_cache(cache_dir=None, new_cache_dir=None, token=None): + if new_cache_dir is None: + new_cache_dir = TRANSFORMERS_CACHE if cache_dir is None: - cache_dir = TRANSFORMERS_CACHE + # Migrate from old cache in .cache/huggingface/transformers + if os.path.isdir(os.path.join(TRANSFORMERS_CACHE, "transformers")): + cache_dir = os.path.join(TRANSFORMERS_CACHE, "transformers") + else: + cache_dir = new_cache_dir if token is None: token = HfFolder.get_token() cached_files = get_all_cached_files(cache_dir=cache_dir) From 69679c9981e5f1b733f506f6014492b4a52e3e4b Mon Sep 17 00:00:00 2001 From: Sylvain Gugger Date: Fri, 5 Aug 2022 12:20:17 -0400 Subject: [PATCH 2/4] Thank you VsCode for this needless import --- src/transformers/utils/hub.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/transformers/utils/hub.py b/src/transformers/utils/hub.py index b78e9f8e211a..c75fab9beadc 100644 --- a/src/transformers/utils/hub.py +++ b/src/transformers/utils/hub.py @@ -29,7 +29,7 @@ import warnings from contextlib import contextmanager from functools import partial -from hashlib import new, sha256 +from hashlib import sha256 from pathlib import Path from typing import BinaryIO, Dict, List, Optional, Tuple, Union from urllib.parse import urlparse From cad5dc910242ed865c44cfb580a25a03531bff11 Mon Sep 17 00:00:00 2001 From: Sylvain Gugger Date: Fri, 5 Aug 2022 12:37:54 -0400 Subject: [PATCH 3/4] Move to hub --- docs/source/en/installation.mdx | 6 +++--- src/transformers/utils/hub.py | 13 +++++++------ 2 files changed, 10 insertions(+), 9 deletions(-) diff --git a/docs/source/en/installation.mdx b/docs/source/en/installation.mdx index c92ede856bf9..d923a80d717e 100644 --- a/docs/source/en/installation.mdx +++ b/docs/source/en/installation.mdx @@ -139,15 +139,15 @@ conda install -c huggingface transformers ## Cache setup -Pretrained models are downloaded and locally cached at: `~/.cache/huggingface/`. This is the default directory given by the shell environment variable `HUGGINGFACE_CACHE`. On Windows, the default directory is given by `C:\Users\username\.cache\huggingface`. You can change the shell environment variables shown below - in order of priority - to specify a different cache directory: +Pretrained models are downloaded and locally cached at: `~/.cache/huggingface/hub`. This is the default directory given by the shell environment variable `TRANSFORMERS_CACHE`. On Windows, the default directory is given by `C:\Users\username\.cache\huggingface`. You can change the shell environment variables shown below - in order of priority - to specify a different cache directory: -1. Shell environment variable (default): `HUGGINGFACE_CACHE` or `TRANSFORMERS_CACHE`. +1. Shell environment variable (default): `HUGGINGFACE_HUB_CACHE` or `TRANSFORMERS_CACHE`. 2. Shell environment variable: `HF_HOME`. 3. Shell environment variable: `XDG_CACHE_HOME` + `/huggingface`. -🤗 Transformers will use the shell environment variables `PYTORCH_TRANSFORMERS_CACHE` or `PYTORCH_PRETRAINED_BERT_CACHE` if you are coming from an earlier iteration of this library and have set those environment variables, unless you specify the shell environment variable `HUGGINGFACE_CACHE`. +🤗 Transformers will use the shell environment variables `PYTORCH_TRANSFORMERS_CACHE` or `PYTORCH_PRETRAINED_BERT_CACHE` if you are coming from an earlier iteration of this library and have set those environment variables, unless you specify the shell environment variable `TRANSFORMERS_CACHE`. diff --git a/src/transformers/utils/hub.py b/src/transformers/utils/hub.py index c75fab9beadc..7fa4c0a151ac 100644 --- a/src/transformers/utils/hub.py +++ b/src/transformers/utils/hub.py @@ -81,8 +81,7 @@ def is_offline_mode(): hf_cache_home = os.path.expanduser( os.getenv("HF_HOME", os.path.join(os.getenv("XDG_CACHE_HOME", "~/.cache"), "huggingface")) ) -hf_cache_home = os.getenv("HUGGINFACE_CACHE", hf_cache_home) -default_cache_path = hf_cache_home +default_cache_path = os.path.join(hf_cache_home, "hub") # Onetime move from the old location to the new one if no ENV variable has been set. if ( @@ -103,7 +102,8 @@ def is_offline_mode(): PYTORCH_PRETRAINED_BERT_CACHE = os.getenv("PYTORCH_PRETRAINED_BERT_CACHE", default_cache_path) PYTORCH_TRANSFORMERS_CACHE = os.getenv("PYTORCH_TRANSFORMERS_CACHE", PYTORCH_PRETRAINED_BERT_CACHE) -TRANSFORMERS_CACHE = os.getenv("TRANSFORMERS_CACHE", PYTORCH_TRANSFORMERS_CACHE) +HUGGINGFACE_HUB_CACHE = os.getenv("HUGGINGFACE_HUB_CACHE", PYTORCH_TRANSFORMERS_CACHE) +TRANSFORMERS_CACHE = os.getenv("TRANSFORMERS_CACHE", HUGGINGFACE_HUB_CACHE) HF_MODULES_CACHE = os.getenv("HF_MODULES_CACHE", os.path.join(hf_cache_home, "modules")) TRANSFORMERS_DYNAMIC_MODULE_NAME = "transformers_modules" SESSION_ID = uuid4().hex @@ -1480,9 +1480,10 @@ def move_cache(cache_dir=None, new_cache_dir=None, token=None): if new_cache_dir is None: new_cache_dir = TRANSFORMERS_CACHE if cache_dir is None: - # Migrate from old cache in .cache/huggingface/transformers - if os.path.isdir(os.path.join(TRANSFORMERS_CACHE, "transformers")): - cache_dir = os.path.join(TRANSFORMERS_CACHE, "transformers") + # Migrate from old cache in .cache/huggingface/hub + old_cache = Path(TRANSFORMERS_CACHE).parent / "transformers" + if os.path.isdir(str(old_cache)): + cache_dir = str(old_cache) else: cache_dir = new_cache_dir if token is None: From b9ef66a331f301bc0bc37d2325c16dc9e5885296 Mon Sep 17 00:00:00 2001 From: Sylvain Gugger Date: Fri, 5 Aug 2022 12:38:44 -0400 Subject: [PATCH 4/4] Forgot one --- docs/source/en/installation.mdx | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/source/en/installation.mdx b/docs/source/en/installation.mdx index d923a80d717e..4ff4e04436c7 100644 --- a/docs/source/en/installation.mdx +++ b/docs/source/en/installation.mdx @@ -139,7 +139,7 @@ conda install -c huggingface transformers ## Cache setup -Pretrained models are downloaded and locally cached at: `~/.cache/huggingface/hub`. This is the default directory given by the shell environment variable `TRANSFORMERS_CACHE`. On Windows, the default directory is given by `C:\Users\username\.cache\huggingface`. You can change the shell environment variables shown below - in order of priority - to specify a different cache directory: +Pretrained models are downloaded and locally cached at: `~/.cache/huggingface/hub`. This is the default directory given by the shell environment variable `TRANSFORMERS_CACHE`. On Windows, the default directory is given by `C:\Users\username\.cache\huggingface\hub`. You can change the shell environment variables shown below - in order of priority - to specify a different cache directory: 1. Shell environment variable (default): `HUGGINGFACE_HUB_CACHE` or `TRANSFORMERS_CACHE`. 2. Shell environment variable: `HF_HOME`.