From abb1275b0bbd2ba597055ccf7176578a9a0619c6 Mon Sep 17 00:00:00 2001 From: Lucain Pouget Date: Fri, 12 Sep 2025 17:17:06 +0200 Subject: [PATCH 1/6] HTTP configuration docs --- docs/source/en/package_reference/utilities.md | 39 +++++++++++++------ docs/source/ko/package_reference/utilities.md | 10 ----- src/huggingface_hub/__init__.py | 6 +-- src/huggingface_hub/utils/__init__.py | 2 +- src/huggingface_hub/utils/_http.py | 10 ++--- 5 files changed, 37 insertions(+), 30 deletions(-) diff --git a/docs/source/en/package_reference/utilities.md b/docs/source/en/package_reference/utilities.md index a7cc46315d..757a6613b2 100644 --- a/docs/source/en/package_reference/utilities.md +++ b/docs/source/en/package_reference/utilities.md @@ -120,23 +120,40 @@ You can also enable or disable progress bars for specific groups. This allows yo [[autodoc]] huggingface_hub.utils.enable_progress_bars -## Configure HTTP backend +## Configuring the HTTP Backend -In some environments, you might want to configure how HTTP calls are made, for example if you are using a proxy. -`huggingface_hub` let you configure this globally using [`configure_http_backend`]. All requests made to the Hub will -then use your settings. Under the hood, `huggingface_hub` uses `requests.Session` so you might want to refer to the -[`requests` documentation](https://requests.readthedocs.io/en/latest/user/advanced) to learn more about the available -parameters. + -Since `requests.Session` is not guaranteed to be thread-safe, `huggingface_hub` creates one session instance per thread. -Using sessions allows us to keep the connection open between HTTP calls and ultimately save time. If you are -integrating `huggingface_hub` in a third-party library and wants to make a custom call to the Hub, use [`get_session`] -to get a Session configured by your users (i.e. replace any `requests.get(...)` call by `get_session().get(...)`). +In `huggingface_hub` v0.x, HTTP requests were handled with `requests`, and configuration was done via `configure_http_backend`. Since we now use `httpx`, configuration works differently: you must provide a factory function that takes no arguments and returns an `httpx.Client`. You can review the [default implementation here](https://github.com/huggingface/huggingface_hub/blob/v1.0-release/src/huggingface_hub/utils/_http.py) to see which parameters are used by default. -[[autodoc]] configure_http_backend + + + +In some setups, you may need to control how HTTP requests are made, for example when working behind a proxy. The `huggingface_hub` library allows you to configure this globally with [`set_client_factory`]. After configuration, all requests to the Hub will use your custom settings. Since `huggingface_hub` relies on `httpx.Client` under the hood, you can check the [`httpx` documentation](https://www.python-httpx.org/advanced/clients/) for details on available parameters. + +If you are building a third-party library and need to make direct requests to the Hub, use [`get_session`] to obtain a correctly configured `httpx` client. Replace any direct `httpx.get(...)` calls with `get_session().get(...)` to ensure proper behavior. + +[[autodoc]] set_client_factory [[autodoc]] get_session +In rare cases, you may want to manually close the current session (for example, after a transient `SSLError`). You can do this with [`close_session`]. A new session will automatically be created on the next call to [`get_session`]. + +Sessions are always closed automatically when the process exits. + +[[autodoc]] close_session + +For async code, use [`set_async_client_factory`] to configure an `httpx.AsyncClient` and [`get_async_session`] to retrieve one. + +[[autodoc]] set_async_client_factory + +[[autodoc]] get_async_session + + + +Unlike the synchronous client, the lifecycle of the async client is not managed automatically. Use an async context manager to handle it properly. + + ## Handle HTTP errors diff --git a/docs/source/ko/package_reference/utilities.md b/docs/source/ko/package_reference/utilities.md index 5743d12015..4390a90718 100644 --- a/docs/source/ko/package_reference/utilities.md +++ b/docs/source/ko/package_reference/utilities.md @@ -84,16 +84,6 @@ True [[autodoc]] huggingface_hub.utils.enable_progress_bars -## HTTP 백엔드 구성[[huggingface_hub.configure_http_backend]] - -일부 환경에서는 HTTP 호출이 이루어지는 방식을 구성할 수 있습니다. 예를 들어, 프록시를 사용하는 경우가 그렇습니다. `huggingface_hub`는 [`configure_http_backend`]를 사용하여 전역적으로 이를 구성할 수 있게 합니다. 그러면 Hub로의 모든 요청이 사용자가 설정한 설정을 사용합니다. 내부적으로 `huggingface_hub`는 `requests.Session`을 사용하므로 사용 가능한 매개변수에 대해 자세히 알아보려면 [requests 문서](https://requests.readthedocs.io/en/latest/user/advanced)를 참조하는 것이 좋습니다. - -`requests.Session`이 스레드 안전을 보장하지 않기 때문에 `huggingface_hub`는 스레드당 하나의 세션 인스턴스를 생성합니다. 세션을 사용하면 HTTP 호출 사이에 연결을 유지하고 최종적으로 시간을 절약할 수 있습니다. `huggingface_hub`를 서드 파티 라이브러리에 통합하고 사용자 지정 호출을 Hub로 만들려는 경우, [`get_session`]을 사용하여 사용자가 구성한 세션을 가져옵니다 (즉, 모든 `requests.get(...)` 호출을 `get_session().get(...)`으로 대체합니다). - -[[autodoc]] configure_http_backend - -[[autodoc]] get_session - ## HTTP 오류 다루기[[handle-http-errors]] diff --git a/src/huggingface_hub/__init__.py b/src/huggingface_hub/__init__.py index f8937a0580..dd2a6ee616 100644 --- a/src/huggingface_hub/__init__.py +++ b/src/huggingface_hub/__init__.py @@ -516,7 +516,7 @@ "HfHubAsyncTransport", "HfHubTransport", "cached_assets_path", - "close_client", + "close_session", "dump_environment_info", "get_async_session", "get_session", @@ -815,7 +815,7 @@ "cancel_access_request", "cancel_job", "change_discussion_status", - "close_client", + "close_session", "comment_discussion", "create_branch", "create_collection", @@ -1518,7 +1518,7 @@ def __dir__(): HfHubAsyncTransport, # noqa: F401 HfHubTransport, # noqa: F401 cached_assets_path, # noqa: F401 - close_client, # noqa: F401 + close_session, # noqa: F401 dump_environment_info, # noqa: F401 get_async_session, # noqa: F401 get_session, # noqa: F401 diff --git a/src/huggingface_hub/utils/__init__.py b/src/huggingface_hub/utils/__init__.py index 6fc8c0ed7e..1b2eccdafc 100644 --- a/src/huggingface_hub/utils/__init__.py +++ b/src/huggingface_hub/utils/__init__.py @@ -55,7 +55,7 @@ CLIENT_FACTORY_T, HfHubAsyncTransport, HfHubTransport, - close_client, + close_session, fix_hf_endpoint_in_url, get_async_session, get_session, diff --git a/src/huggingface_hub/utils/_http.py b/src/huggingface_hub/utils/_http.py index 15484ec10d..c52fd6cc96 100644 --- a/src/huggingface_hub/utils/_http.py +++ b/src/huggingface_hub/utils/_http.py @@ -174,7 +174,7 @@ def set_client_factory(client_factory: CLIENT_FACTORY_T) -> None: """ global _GLOBAL_CLIENT_FACTORY with _CLIENT_LOCK: - close_client() + close_session() _GLOBAL_CLIENT_FACTORY = client_factory @@ -228,9 +228,9 @@ def get_async_session() -> httpx.AsyncClient: return _GLOBAL_ASYNC_CLIENT_FACTORY() -def close_client() -> None: +def close_session() -> None: """ - Close the global httpx.Client used by `huggingface_hub`. + Close the global `httpx.Client` used by `huggingface_hub`. If a Client is closed, it will be recreated on the next call to [`get_client`]. @@ -250,7 +250,7 @@ def close_client() -> None: logger.warning(f"Error closing client: {e}") -atexit.register(close_client) +atexit.register(close_session) def _http_backoff_base( @@ -325,7 +325,7 @@ def _should_retry(response: httpx.Response) -> bool: logger.warning(f"'{err}' thrown while requesting {method} {url}") if isinstance(err, httpx.ConnectError): - close_client() # In case of SSLError it's best to close the shared httpx.Client objects + close_session() # In case of SSLError it's best to close the shared httpx.Client objects if nb_tries > max_retries: raise err From e4bcfdd8c71d7b80b584d0a5f8fbbb5a61f73f84 Mon Sep 17 00:00:00 2001 From: Lucain Pouget Date: Fri, 12 Sep 2025 17:26:14 +0200 Subject: [PATCH 2/6] http configuration docs --- docs/source/en/package_reference/utilities.md | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/docs/source/en/package_reference/utilities.md b/docs/source/en/package_reference/utilities.md index 757a6613b2..c0ec92ed53 100644 --- a/docs/source/en/package_reference/utilities.md +++ b/docs/source/en/package_reference/utilities.md @@ -124,7 +124,8 @@ You can also enable or disable progress bars for specific groups. This allows yo -In `huggingface_hub` v0.x, HTTP requests were handled with `requests`, and configuration was done via `configure_http_backend`. Since we now use `httpx`, configuration works differently: you must provide a factory function that takes no arguments and returns an `httpx.Client`. You can review the [default implementation here](https://github.com/huggingface/huggingface_hub/blob/v1.0-release/src/huggingface_hub/utils/_http.py) to see which parameters are used by default. +In `huggingface_hub` v0.x, HTTP requests were handled with `requests`, and configuration was done via ` + git push --set-upstream origin v1.0-some-more-docsend`. Since we now use `httpx`, configuration works differently: you must provide a factory function that takes no arguments and returns an `httpx.Client`. You can review the [default implementation here](https://github.com/huggingface/huggingface_hub/blob/v1.0-release/src/huggingface_hub/utils/_http.py) to see which parameters are used by default. From c5081b70502f9330fcaf5c0119b36ea8f87e55d3 Mon Sep 17 00:00:00 2001 From: Lucain Pouget Date: Fri, 12 Sep 2025 17:33:48 +0200 Subject: [PATCH 3/6] refactored git_vs_http --- docs/source/en/concepts/git_vs_http.md | 53 ++++++-------------------- 1 file changed, 11 insertions(+), 42 deletions(-) diff --git a/docs/source/en/concepts/git_vs_http.md b/docs/source/en/concepts/git_vs_http.md index e6eb755af5..49d0370752 100644 --- a/docs/source/en/concepts/git_vs_http.md +++ b/docs/source/en/concepts/git_vs_http.md @@ -4,59 +4,28 @@ rendered properly in your Markdown viewer. # Git vs HTTP paradigm -The `huggingface_hub` library is a library for interacting with the Hugging Face Hub, which is a -collection of git-based repositories (models, datasets or Spaces). There are two main -ways to access the Hub using `huggingface_hub`. +The `huggingface_hub` library is a library for interacting with the Hugging Face Hub, which is a collection of git-based repositories (models, datasets or Spaces). There are two main ways to access the Hub using `huggingface_hub`. -The first approach, the so-called "git-based" approach, is led by the [`Repository`] class. -This method uses a wrapper around the `git` command with additional functions specifically -designed to interact with the Hub. The second option, called the "HTTP-based" approach, -involves making HTTP requests using the [`HfApi`] client. Let's examine the pros and cons -of each approach. +The first approach, the so-called "git-based" approach, relies on using standard `git` commands directly in a terminal. This method allows you to clone repositories, create commits, and push changes manually. The second option, called the "HTTP-based" approach, involves making HTTP requests using the [`HfApi`] client. Let's examine the pros and cons of each approach. -## Repository: the historical git-based approach +## Git: the historical CLI-based approach -At first, `huggingface_hub` was mostly built around the [`Repository`] class. It provides -Python wrappers for common `git` commands such as `"git add"`, `"git commit"`, `"git push"`, -`"git tag"`, `"git checkout"`, etc. +At first, most users interacted with the Hugging Face Hub using plain `git` commands such as `git clone`, `git add`, `git commit`, `git push`, `git tag`, or `git checkout`. -The library also helps with setting credentials and tracking large files, which are often -used in machine learning repositories. Additionally, the library allows you to execute its -methods in the background, making it useful for uploading data during training. +This approach lets you work with a full local copy of the repository on your machine, just like in traditional software development. This can be an advantage when you need offline access or want to work with the full history of a repository. However, it also comes with downsides: you are responsible for keeping the repository up-to-date locally, handling credentials, and managing large files (via `git-lfs`), which can become cumbersome when working with large machine learning models or datasets. -The main advantage of using a [`Repository`] is that it allows you to maintain a local -copy of the entire repository on your machine. This can also be a disadvantage as -it requires you to constantly update and maintain this local copy. This is similar to -traditional software development where each developer maintains their own local copy and -pushes changes when working on a feature. However, in the context of machine learning, -this may not always be necessary as users may only need to download weights for inference -or convert weights from one format to another without the need to clone the entire -repository. - - - -[`Repository`] is now deprecated in favor of the http-based alternatives. Given its large adoption in legacy code, the complete removal of [`Repository`] will only happen in release `v1.0`. - - +In many machine learning workflows, you may only need to download a few files for inference or convert weights without needing to clone the entire repository. In such cases, using `git` can be overkill and introduce unnecessary complexity. ## HfApi: a flexible and convenient HTTP client -The [`HfApi`] class was developed to provide an alternative to local git repositories, which -can be cumbersome to maintain, especially when dealing with large models or datasets. The -[`HfApi`] class offers the same functionality as git-based approaches, such as downloading -and pushing files and creating branches and tags, but without the need for a local folder -that needs to be kept in sync. +The [`HfApi`] class was developed to provide an alternative to using local git repositories, which can be cumbersome to maintain, especially when dealing with large models or datasets. The [`HfApi`] class offers the same functionality as git-based workflows -such as downloading and pushing files and creating branches and tags- but without the need for a local folder that needs to be kept in sync. -In addition to the functionalities already provided by `git`, the [`HfApi`] class offers -additional features, such as the ability to manage repos, download files using caching for -efficient reuse, search the Hub for repos and metadata, access community features such as -discussions, PRs, and comments, and configure Spaces hardware and secrets. +In addition to the functionalities already provided by `git`, the [`HfApi`] class offers additional features, such as the ability to manage repos, download files using caching for efficient reuse, search the Hub for repos and metadata, access community features such as discussions, PRs, and comments, and configure Spaces hardware and secrets. ## What should I use ? And when ? -Overall, the **HTTP-based approach is the recommended way to use** `huggingface_hub` -in all cases. [`HfApi`] allows to pull and push changes, work with PRs, tags and branches, interact with discussions and much more. Since the `0.16` release, the http-based methods can also run in the background, which was the last major advantage of the [`Repository`] class. +Overall, the **HTTP-based approach is the recommended way to use** `huggingface_hub` in all cases. [`HfApi`] allows you to pull and push changes, work with PRs, tags and branches, interact with discussions and much more. -However, not all git commands are available through [`HfApi`]. Some may never be implemented, but we are always trying to improve and close the gap. If you don't see your use case covered, please open [an issue on Github](https://github.com/huggingface/huggingface_hub)! We welcome feedback to help build the 🤗 ecosystem with and for our users. +However, not all git commands are available through [`HfApi`]. Some may never be implemented, but we are always trying to improve and close the gap. If you don't see your use case covered, please open [an issue on GitHub](https://github.com/huggingface/huggingface_hub)! We welcome feedback to help build the HF ecosystem with and for our users. -This preference of the http-based [`HfApi`] over the git-based [`Repository`] does not mean that git versioning will disappear from the Hugging Face Hub anytime soon. It will always be possible to use `git` commands locally in workflows where it makes sense. +This preference for the HTTP-based [`HfApi`] over direct `git` commands does not mean that git versioning will disappear from the Hugging Face Hub anytime soon. It will always be possible to use `git` locally in workflows where it makes sense. \ No newline at end of file From b22263bad3978d50f2f4ccae759b07518fa22e44 Mon Sep 17 00:00:00 2001 From: Lucain Pouget Date: Mon, 15 Sep 2025 14:47:41 +0200 Subject: [PATCH 4/6] fix import --- docs/source/en/package_reference/utilities.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/source/en/package_reference/utilities.md b/docs/source/en/package_reference/utilities.md index c0ec92ed53..d87bd6d0af 100644 --- a/docs/source/en/package_reference/utilities.md +++ b/docs/source/en/package_reference/utilities.md @@ -296,4 +296,4 @@ validated. Not exactly a validator, but ran as well. -[[autodoc]] utils.smoothly_deprecate_legacy_arguments +[[autodoc]] utils._validators.smoothly_deprecate_legacy_arguments From a10d76bb4fa5d7391e3a1998c0b39da4a8cfeb08 Mon Sep 17 00:00:00 2001 From: Lucain Pouget Date: Mon, 15 Sep 2025 14:59:40 +0200 Subject: [PATCH 5/6] fix docs? --- docs/source/ko/_toctree.yml | 2 -- 1 file changed, 2 deletions(-) diff --git a/docs/source/ko/_toctree.yml b/docs/source/ko/_toctree.yml index 0a82cd72db..e67d69af38 100644 --- a/docs/source/ko/_toctree.yml +++ b/docs/source/ko/_toctree.yml @@ -18,8 +18,6 @@ title: 명령줄 인터페이스(CLI) 사용하기 - local: guides/hf_file_system title: Hf파일시스템 - - local: guides/repository - title: 리포지토리 - local: guides/search title: Hub에서 검색하기 - local: guides/inference From 003aaa1757c9a6b3f4b4ea997c528593c468401f Mon Sep 17 00:00:00 2001 From: Lucain Date: Wed, 17 Sep 2025 11:58:24 +0200 Subject: [PATCH 6/6] Update docs/source/en/package_reference/utilities.md MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Co-authored-by: célina --- docs/source/en/package_reference/utilities.md | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/docs/source/en/package_reference/utilities.md b/docs/source/en/package_reference/utilities.md index d87bd6d0af..2b66c260d1 100644 --- a/docs/source/en/package_reference/utilities.md +++ b/docs/source/en/package_reference/utilities.md @@ -124,8 +124,7 @@ You can also enable or disable progress bars for specific groups. This allows yo -In `huggingface_hub` v0.x, HTTP requests were handled with `requests`, and configuration was done via ` - git push --set-upstream origin v1.0-some-more-docsend`. Since we now use `httpx`, configuration works differently: you must provide a factory function that takes no arguments and returns an `httpx.Client`. You can review the [default implementation here](https://github.com/huggingface/huggingface_hub/blob/v1.0-release/src/huggingface_hub/utils/_http.py) to see which parameters are used by default. +In `huggingface_hub` v0.x, HTTP requests were handled with `requests`, and configuration was done via `configure_http_backend`. Since we now use `httpx`, configuration works differently: you must provide a factory function that takes no arguments and returns an `httpx.Client`. You can review the [default implementation here](https://github.com/huggingface/huggingface_hub/blob/v1.0-release/src/huggingface_hub/utils/_http.py) to see which parameters are used by default.