77import copy
88import io
99import json
10+ import multiprocessing
1011import os
1112import posixpath
1213import re
1920from functools import partial
2021from pathlib import Path
2122from typing import Optional , TypeVar , Union
23+ from unittest .mock import patch
2224from urllib .parse import urljoin , urlparse
2325
2426import fsspec
@@ -319,6 +321,12 @@ def fsspec_head(url, storage_options=None):
319321 return fs .info (paths [0 ])
320322
321323
324+ def stack_multiprocessing_download_progress_bars ():
325+ # Stack downloads progress bars automatically using HF_DATASETS_STACK_MULTIPROCESSING_DOWNLOAD_PROGRESS_BARS=1
326+ # We use environment variables since the download may happen in a subprocess
327+ return patch .dict (os .environ , {"HF_DATASETS_STACK_MULTIPROCESSING_DOWNLOAD_PROGRESS_BARS" : "1" })
328+
329+
322330class TqdmCallback (fsspec .callbacks .TqdmCallback ):
323331 def __init__ (self , tqdm_kwargs = None , * args , ** kwargs ):
324332 super ().__init__ (tqdm_kwargs , * args , ** kwargs )
@@ -335,6 +343,10 @@ def fsspec_get(url, temp_file, storage_options=None, desc=None):
335343 "desc" : desc or "Downloading" ,
336344 "unit" : "B" ,
337345 "unit_scale" : True ,
346+ "position" : multiprocessing .current_process ()._identity [- 1 ] # contains the ranks of subprocesses
347+ if os .environ .get ("HF_DATASETS_STACK_MULTIPROCESSING_DOWNLOAD_PROGRESS_BARS" ) == "1"
348+ and multiprocessing .current_process ()._identity
349+ else None ,
338350 }
339351 )
340352 fs .get_file (paths [0 ], temp_file .name , callback = callback )
@@ -389,6 +401,10 @@ def http_get(
389401 total = total ,
390402 initial = resume_size ,
391403 desc = desc or "Downloading" ,
404+ position = multiprocessing .current_process ()._identity [- 1 ] # contains the ranks of subprocesses
405+ if os .environ .get ("HF_DATASETS_STACK_MULTIPROCESSING_DOWNLOAD_PROGRESS_BARS" ) == "1"
406+ and multiprocessing .current_process ()._identity
407+ else None ,
392408 ) as progress :
393409 for chunk in response .iter_content (chunk_size = 1024 ):
394410 progress .update (len (chunk ))
0 commit comments