Skip to content
Merged
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
12 changes: 5 additions & 7 deletions docs/source/main_classes/pipelines.mdx
Original file line number Diff line number Diff line change
Expand Up @@ -79,14 +79,14 @@ GPU. If it doesn't don't hesitate to create an issue.
import datasets
from transformers import pipeline
from transformers.pipelines.base import KeyDataset
import tqdm
from tqdm.auto import tqdm

pipe = pipeline("automatic-speech-recognition", model="facebook/wav2vec2-base-960h", device=0)
dataset = datasets.load_dataset("superb", name="asr", split="test")

# KeyDataset (only *pt*) will simply return the item in the dict returned by the dataset item
# as we're not interested in the *target* part of the dataset.
for out in tqdm.tqdm(pipe(KeyDataset(dataset, "file"))):
for out in tqdm(pipe(KeyDataset(dataset, "file"))):
print(out)
# {"text": "NUMBER TEN FRESH NELLY IS WAITING ON YOU GOOD NIGHT HUSBAND"}
# {"text": ....}
Expand Down Expand Up @@ -130,7 +130,6 @@ whenever the pipeline uses its streaming ability (so when passing lists or `Data
from transformers import pipeline
from transformers.pipelines.base import KeyDataset
import datasets
import tqdm

dataset = datasets.load_dataset("imdb", name="plain_text", split="unsupervised")
pipe = pipeline("text-classification", device=0)
Expand All @@ -153,8 +152,7 @@ Example where it's mostly a speedup:
```python
from transformers import pipeline
from torch.utils.data import Dataset
import tqdm

from tqdm.auto import tqdm

pipe = pipeline("text-classification", device=0)

Expand All @@ -172,7 +170,7 @@ dataset = MyDataset()
for batch_size in [1, 8, 64, 256]:
print("-" * 30)
print(f"Streaming batch_size={batch_size}")
for out in tqdm.tqdm(pipe(dataset, batch_size=batch_size), total=len(dataset)):
for out in tqdm(pipe(dataset, batch_size=batch_size), total=len(dataset)):
pass
```

Expand Down Expand Up @@ -228,7 +226,7 @@ Streaming batch_size=256
0%| | 0/1000 [00:00<?, ?it/s]
Traceback (most recent call last):
File "/home/nicolas/src/transformers/test.py", line 42, in <module>
for out in tqdm.tqdm(pipe(dataset, batch_size=256), total=len(dataset)):
for out in tqdm(pipe(dataset, batch_size=256), total=len(dataset)):
....
q = q / math.sqrt(dim_per_head) # (bs, n_heads, q_length, dim_per_head)
RuntimeError: CUDA out of memory. Tried to allocate 376.00 MiB (GPU 0; 3.95 GiB total capacity; 1.72 GiB already allocated; 354.88 MiB free; 2.46 GiB reserved in total by PyTorch)
Expand Down