From 246a6be71eb5c6d8048e3dea00fa342261aa300b Mon Sep 17 00:00:00 2001
From: Santiago Castro <sacastro@fing.edu.uy>
Date: Fri, 24 Dec 2021 18:40:42 +0200
Subject: [PATCH] Use tqdm.auto in Pipeline docs

It's better for e.g. notebook.
---
 docs/source/main_classes/pipelines.mdx | 53 +++++++++++++-------------
 1 file changed, 26 insertions(+), 27 deletions(-)
diff --git a/docs/source/main_classes/pipelines.mdx b/docs/source/main_classes/pipelines.mdx
index a955cdef7054..52c2b15ab87a 100644
--- a/docs/source/main_classes/pipelines.mdx
+++ b/docs/source/main_classes/pipelines.mdx
@@ -79,14 +79,14 @@ GPU. If it doesn't don't hesitate to create an issue.
 import datasets
 from transformers import pipeline
 from transformers.pipelines.base import KeyDataset
-import tqdm
+from tqdm.auto import tqdm
 
 pipe = pipeline("automatic-speech-recognition", model="facebook/wav2vec2-base-960h", device=0)
 dataset = datasets.load_dataset("superb", name="asr", split="test")
 
 # KeyDataset (only *pt*) will simply return the item in the dict returned by the dataset item
 # as we're not interested in the *target* part of the dataset.
-for out in tqdm.tqdm(pipe(KeyDataset(dataset, "file"))):
+for out in tqdm(pipe(KeyDataset(dataset, "file"))):
     print(out)
     # {"text": "NUMBER TEN FRESH NELLY IS WAITING ON YOU GOOD NIGHT HUSBAND"}
     # {"text": ....}
@@ -101,10 +101,9 @@ All pipelines (except *zero-shot-classification* and *question-answering* curren
 whenever the pipeline uses its streaming ability (so when passing lists or `Dataset`).
 
 ```python
-from transformers import pipeline                                                   
+from transformers import pipeline
 from transformers.pipelines.base import KeyDataset
 import datasets
-import tqdm                                                                         
 
 dataset = datasets.load_dataset("imdb", name="plain_text", split="unsupervised")
 pipe = pipeline("text-classification", device=0)
@@ -125,28 +124,28 @@ Example where it's most a speedup:
 </Tip>
 
 ```python
-from transformers import pipeline                                                   
-from torch.utils.data import Dataset                                                
-import tqdm                                                                         
+from transformers import pipeline
+from torch.utils.data import Dataset
+from tqdm.auto import tqdm
 
 
-pipe = pipeline("text-classification", device=0)                                    
+pipe = pipeline("text-classification", device=0)
 
 
-class MyDataset(Dataset):                                                           
-    def __len__(self):                                                              
-        return 5000                                                                 
+class MyDataset(Dataset):
+    def __len__(self):
+        return 5000
 
-    def __getitem__(self, i):                                                       
-        return "This is a test"                                                     
+    def __getitem__(self, i):
+        return "This is a test"
 
 
-dataset = MyDataset()   
+dataset = MyDataset()
 
 for batch_size in [1, 8, 64, 256]:
-    print("-" * 30)                                                                     
-    print(f"Streaming batch_size={batch_size}")    
-    for out in tqdm.tqdm(pipe(dataset, batch_size=batch_size), total=len(dataset)):              
+    print("-" * 30)
+    print(f"Streaming batch_size={batch_size}")
+    for out in tqdm(pipe(dataset, batch_size=batch_size), total=len(dataset)):
         pass
 ```
 
@@ -170,15 +169,15 @@ Streaming batch_size=256
 Example where it's most a slowdown:
 
 ```python
-class MyDataset(Dataset):                                                           
-    def __len__(self):                                                              
-        return 5000                                                                 
-
-    def __getitem__(self, i):                                                       
-        if i % 64 == 0:                                                          
-            n = 100                                                              
-        else:                                                                    
-            n = 1                                                                
+class MyDataset(Dataset):
+    def __len__(self):
+        return 5000
+
+    def __getitem__(self, i):
+        if i % 64 == 0:
+            n = 100
+        else:
+            n = 1
         return "This is a test" * n
 ```
 
@@ -202,7 +201,7 @@ Streaming batch_size=256
   0%|                                                                                 | 0/1000 [00:00<?, ?it/s]
 Traceback (most recent call last):
   File "/home/nicolas/src/transformers/test.py", line 42, in <module>
-    for out in tqdm.tqdm(pipe(dataset, batch_size=256), total=len(dataset)):
+    for out in tqdm(pipe(dataset, batch_size=256), total=len(dataset)):
 ....
     q = q / math.sqrt(dim_per_head)  # (bs, n_heads, q_length, dim_per_head)
 RuntimeError: CUDA out of memory. Tried to allocate 376.00 MiB (GPU 0; 3.95 GiB total capacity; 1.72 GiB already allocated; 354.88 MiB free; 2.46 GiB reserved in total by PyTorch)