Skip to content

Commit 3e9fdcf

Browse files
authored
[DOC] fix doc examples for bart-like models (#15093)
* fix doc examples * remove double colons
1 parent 61d18ae commit 3e9fdcf

File tree

16 files changed

+291
-216
lines changed

16 files changed

+291
-216
lines changed

src/transformers/models/bart/modeling_bart.py

Lines changed: 25 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -534,33 +534,40 @@ def __init_subclass__(self):
534534
"""
535535

536536
BART_GENERATION_EXAMPLE = r"""
537-
Summarization example::
537+
Summarization example:
538538
539-
>>> from transformers import BartTokenizer, BartForConditionalGeneration, BartConfig
539+
```python
540+
>>> from transformers import BartTokenizer, BartForConditionalGeneration
540541
541-
>>> model = BartForConditionalGeneration.from_pretrained('facebook/bart-large-cnn') >>> tokenizer =
542-
BartTokenizer.from_pretrained('facebook/bart-large-cnn')
542+
>>> model = BartForConditionalGeneration.from_pretrained("facebook/bart-large-cnn")
543+
>>> tokenizer = BartTokenizer.from_pretrained("facebook/bart-large-cnn")
543544
544-
>>> ARTICLE_TO_SUMMARIZE = "My friends are cool but they eat too many carbs." >>> inputs =
545-
tokenizer([ARTICLE_TO_SUMMARIZE], max_length=1024, return_tensors='pt')
545+
>>> ARTICLE_TO_SUMMARIZE = "My friends are cool but they eat too many carbs."
546+
>>> inputs = tokenizer([ARTICLE_TO_SUMMARIZE], max_length=1024, return_tensors="pt")
546547
547-
>>> # Generate Summary >>> summary_ids = model.generate(inputs['input_ids'], num_beams=4, max_length=5,
548-
early_stopping=True) >>> print([tokenizer.decode(g, skip_special_tokens=True,
549-
clean_up_tokenization_spaces=False) for g in summary_ids])
548+
>>> # Generate Summary
549+
>>> summary_ids = model.generate(inputs["input_ids"], num_beams=4, max_length=5)
550+
>>> print(tokenizer.batch_decode(summary_ids, skip_special_tokens=True, clean_up_tokenization_spaces=False))
551+
```
550552
551-
Mask filling example::
553+
Mask filling example:
552554
553-
>>> from transformers import BartTokenizer, BartForConditionalGeneration >>> tokenizer =
554-
BartTokenizer.from_pretrained('facebook/bart-large') >>> TXT = "My friends are <mask> but they eat too many
555-
carbs."
555+
```python
556+
>>> from transformers import BartTokenizer, BartForConditionalGeneration
556557
557-
>>> model = BartForConditionalGeneration.from_pretrained('facebook/bart-large') >>> input_ids =
558-
tokenizer([TXT], return_tensors='pt')['input_ids'] >>> logits = model(input_ids).logits
558+
>>> tokenizer = BartTokenizer.from_pretrained("facebook/bart-large")
559+
>>> TXT = "My friends are <mask> but they eat too many carbs."
559560
560-
>>> masked_index = (input_ids[0] == tokenizer.mask_token_id).nonzero().item() >>> probs = logits[0,
561-
masked_index].softmax(dim=0) >>> values, predictions = probs.topk(5)
561+
>>> model = BartForConditionalGeneration.from_pretrained("facebook/bart-large")
562+
>>> input_ids = tokenizer([TXT], return_tensors="pt")["input_ids"]
563+
>>> logits = model(input_ids).logits
562564
563-
>>> tokenizer.decode(predictions).split()
565+
>>> masked_index = (input_ids[0] == tokenizer.mask_token_id).nonzero().item()
566+
>>> probs = logits[0, masked_index].softmax(dim=0)
567+
>>> values, predictions = probs.topk(5)
568+
569+
>>> tokenizer.decode(predictions).split()
570+
```
564571
"""
565572

566573
BART_INPUTS_DOCSTRING = r"""

src/transformers/models/bart/modeling_flax_bart.py

Lines changed: 25 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -1506,32 +1506,40 @@ def update_inputs_for_generation(self, model_outputs, model_kwargs):
15061506
FLAX_BART_CONDITIONAL_GENERATION_DOCSTRING = """
15071507
Returns:
15081508
1509-
Summarization example::
1509+
Summarization example:
15101510
1511-
>>> from transformers import BartTokenizer, FlaxBartForConditionalGeneration
1511+
```python
1512+
>>> from transformers import BartTokenizer, FlaxBartForConditionalGeneration
1513+
1514+
>>> model = FlaxBartForConditionalGeneration.from_pretrained("facebook/bart-large-cnn")
1515+
>>> tokenizer = BartTokenizer.from_pretrained("facebook/bart-large-cnn")
15121516
1513-
>>> model = FlaxBartForConditionalGeneration.from_pretrained('facebook/bart-large-cnn') >>> tokenizer =
1514-
BartTokenizer.from_pretrained('facebook/bart-large-cnn')
1517+
>>> ARTICLE_TO_SUMMARIZE = "My friends are cool but they eat too many carbs."
1518+
>>> inputs = tokenizer([ARTICLE_TO_SUMMARIZE], max_length=1024, return_tensors="np")
15151519
1516-
>>> ARTICLE_TO_SUMMARIZE = "My friends are cool but they eat too many carbs." >>> inputs =
1517-
tokenizer([ARTICLE_TO_SUMMARIZE], max_length=1024, return_tensors='jax')
1520+
>>> # Generate Summary
1521+
>>> summary_ids = model.generate(inputs["input_ids"]).sequences
1522+
>>> print(tokenizer.batch_decode(summary_ids, skip_special_tokens=True, clean_up_tokenization_spaces=False))
1523+
```
15181524
1519-
>>> # Generate Summary >>> summary_ids = model.generate(inputs['input_ids']).sequences >>>
1520-
print(tokenizer.batch_decode(summary_ids, skip_special_tokens=True, clean_up_tokenization_spaces=False))
1525+
Mask filling example:
15211526
1522-
Mask filling example::
1527+
```python
1528+
>>> from transformers import BartTokenizer, FlaxBartForConditionalGeneration
15231529
1524-
>>> from transformers import BartTokenizer, FlaxBartForConditionalGeneration >>> tokenizer =
1525-
BartTokenizer.from_pretrained('facebook/bart-large') >>> TXT = "My friends are <mask> but they eat too many
1526-
carbs."
1530+
>>> model = FlaxBartForConditionalGeneration.from_pretrained("facebook/bart-large")
1531+
>>> tokenizer = BartTokenizer.from_pretrained("facebook/bart-large")
15271532
1528-
>>> model = FlaxBartForConditionalGeneration.from_pretrained('facebook/bart-large') >>> input_ids =
1529-
tokenizer([TXT], return_tensors='jax')['input_ids'] >>> logits = model(input_ids).logits
1533+
>>> TXT = "My friends are <mask> but they eat too many carbs."
1534+
>>> input_ids = tokenizer([TXT], return_tensors="jax")["input_ids"]
15301535
1531-
>>> masked_index = (input_ids[0] == tokenizer.mask_token_id).nonzero()[0].item() >>> probs =
1532-
jax.nn.softmax(logits[0, masked_index], axis=0) >>> values, predictions = jax.lax.top_k(probs)
1536+
>>> logits = model(input_ids).logits
1537+
>>> masked_index = (input_ids[0] == tokenizer.mask_token_id).nonzero()[0].item()
1538+
>>> probs = jax.nn.softmax(logits[0, masked_index], axis=0)
1539+
>>> values, predictions = jax.lax.top_k(probs)
15331540
1534-
>>> tokenizer.decode(predictions).split()
1541+
>>> tokenizer.decode(predictions).split()
1542+
```
15351543
"""
15361544

15371545
overwrite_call_docstring(

src/transformers/models/bart/modeling_tf_bart.py

Lines changed: 23 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -510,29 +510,36 @@ def serving(self, inputs):
510510

511511

512512
BART_GENERATION_EXAMPLE = r"""
513-
Summarization example::
513+
Summarization example:
514514
515-
>>> from transformers import BartTokenizer, TFBartForConditionalGeneration, BartConfig
515+
```python
516+
>>> from transformers import BartTokenizer, TFBartForConditionalGeneration
516517
517-
>>> model = TFBartForConditionalGeneration.from_pretrained('facebook/bart-large') >>> tokenizer =
518-
BartTokenizer.from_pretrained('facebook/bart-large')
518+
>>> model = TFBartForConditionalGeneration.from_pretrained("facebook/bart-large")
519+
>>> tokenizer = BartTokenizer.from_pretrained("facebook/bart-large")
519520
520-
>>> ARTICLE_TO_SUMMARIZE = "My friends are cool but they eat too many carbs." >>> inputs =
521-
tokenizer([ARTICLE_TO_SUMMARIZE], max_length=1024, return_tensors='tf')
521+
>>> ARTICLE_TO_SUMMARIZE = "My friends are cool but they eat too many carbs."
522+
>>> inputs = tokenizer([ARTICLE_TO_SUMMARIZE], max_length=1024, return_tensors="tf")
522523
523-
>>> # Generate Summary >>> summary_ids = model.generate(inputs['input_ids'], num_beams=4, max_length=5,
524-
early_stopping=True) >>> print([tokenizer.decode(g, skip_special_tokens=True,
525-
clean_up_tokenization_spaces=False) for g in summary_ids])
524+
>>> # Generate Summary
525+
>>> summary_ids = model.generate(inputs["input_ids"], num_beams=4, max_length=5)
526+
>>> print(tokenizer.batch_decode(summary_ids, skip_special_tokens=True, clean_up_tokenization_spaces=False))
527+
```
526528
527-
Mask filling example::
529+
Mask filling example:
528530
529-
>>> from transformers import BartTokenizer, TFBartForConditionalGeneration >>> tokenizer =
530-
BartTokenizer.from_pretrained('facebook/bart-large') >>> TXT = "My friends are <mask> but they eat too many
531-
carbs."
531+
```python
532+
>>> from transformers import BartTokenizer, TFBartForConditionalGeneration
532533
533-
>>> model = TFBartForConditionalGeneration.from_pretrained('facebook/bart-large') >>> input_ids =
534-
tokenizer([TXT], return_tensors='tf')['input_ids'] >>> logits = model(input_ids).logits >>> probs =
535-
tf.nn.softmax(logits[0]) >>> # probs[5] is associated with the mask token
534+
>>> tokenizer = BartTokenizer.from_pretrained("facebook/bart-large")
535+
>>> TXT = "My friends are <mask> but they eat too many carbs."
536+
537+
>>> model = TFBartForConditionalGeneration.from_pretrained("facebook/bart-large")
538+
>>> input_ids = tokenizer([TXT], return_tensors="tf")["input_ids"]
539+
>>> logits = model(input_ids).logits
540+
>>> probs = tf.nn.softmax(logits[0])
541+
>>> # probs[5] is associated with the mask token
542+
```
536543
"""
537544

538545

src/transformers/models/bigbird_pegasus/modeling_bigbird_pegasus.py

Lines changed: 11 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -1619,19 +1619,21 @@ def dummy_inputs(self):
16191619
"""
16201620

16211621
BIGBIRD_PEGASUS_GENERATION_EXAMPLE = r"""
1622-
Summarization example::
1622+
Summarization example:
16231623
1624-
>>> from transformers import PegasusTokenizer, BigBirdPegasusForConditionalGeneration, BigBirdPegasusConfig
1624+
```python
1625+
>>> from transformers import PegasusTokenizer, BigBirdPegasusForConditionalGeneration
16251626
1626-
>>> model = BigBirdPegasusForConditionalGeneration.from_pretrained('google/bigbird-pegasus-large-arxiv') >>>
1627-
tokenizer = PegasusTokenizer.from_pretrained('google/bigbird-pegasus-large-arxiv')
1627+
>>> model = BigBirdPegasusForConditionalGeneration.from_pretrained("google/bigbird-pegasus-large-arxiv")
1628+
>>> tokenizer = PegasusTokenizer.from_pretrained("google/bigbird-pegasus-large-arxiv")
16281629
1629-
>>> ARTICLE_TO_SUMMARIZE = "My friends are cool but they eat too many carbs." >>> inputs =
1630-
tokenizer([ARTICLE_TO_SUMMARIZE], max_length=4096, return_tensors='pt', truncation=True)
1630+
>>> ARTICLE_TO_SUMMARIZE = "My friends are cool but they eat too many carbs."
1631+
>>> inputs = tokenizer([ARTICLE_TO_SUMMARIZE], max_length=4096, return_tensors="pt", truncation=True)
16311632
1632-
>>> # Generate Summary >>> summary_ids = model.generate(inputs['input_ids'], num_beams=4, max_length=5,
1633-
early_stopping=True) >>> print([tokenizer.decode(g, skip_special_tokens=True,
1634-
clean_up_tokenization_spaces=False) for g in summary_ids])
1633+
>>> # Generate Summary
1634+
>>> summary_ids = model.generate(inputs["input_ids"], num_beams=4, max_length=5)
1635+
>>> print(tokenizer.batch_decode(summary_ids, skip_special_tokens=True, clean_up_tokenization_spaces=False))
1636+
```
16351637
"""
16361638

16371639
BIGBIRD_PEGASUS_INPUTS_DOCSTRING = r"""

src/transformers/models/blenderbot_small/modeling_flax_blenderbot_small.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1482,7 +1482,7 @@ def update_inputs_for_generation(self, model_outputs, model_kwargs):
14821482
FLAX_BLENDERBOT_SMALL_CONDITIONAL_GENERATION_DOCSTRING = """
14831483
Returns:
14841484
1485-
Summarization example::
1485+
Summarization example:
14861486
14871487
>>> from transformers import BlenderbotSmallTokenizer, FlaxBlenderbotSmallForConditionalGeneration
14881488
@@ -1495,7 +1495,7 @@ def update_inputs_for_generation(self, model_outputs, model_kwargs):
14951495
>>> # Generate Summary >>> summary_ids = model.generate(inputs['input_ids']).sequences >>>
14961496
print(tokenizer.batch_decode(summary_ids, skip_special_tokens=True, clean_up_tokenization_spaces=False))
14971497
1498-
Mask filling example::
1498+
Mask filling example:
14991499
15001500
>>> from transformers import BlenderbotSmallTokenizer, FlaxBlenderbotSmallForConditionalGeneration >>>
15011501
tokenizer = BlenderbotSmallTokenizer.from_pretrained('facebook/blenderbot_small-90M') >>> TXT = "My friends are

src/transformers/models/fsmt/modeling_fsmt.py

Lines changed: 13 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -199,16 +199,19 @@
199199
FSMT_GENERATION_EXAMPLE = r"""
200200
Translation example::
201201
202-
from transformers import FSMTTokenizer, FSMTForConditionalGeneration
203-
204-
mname = "facebook/wmt19-ru-en" model = FSMTForConditionalGeneration.from_pretrained(mname) tokenizer =
205-
FSMTTokenizer.from_pretrained(mname)
206-
207-
src_text = "Машинное обучение - это здорово, не так ли?" input_ids = tokenizer.encode(src_text,
208-
return_tensors='pt') outputs = model.generate(input_ids, num_beams=5, num_return_sequences=3) for i, output in
209-
enumerate(outputs):
210-
decoded = tokenizer.decode(output, skip_special_tokens=True) print(f"{i}: {decoded})
211-
# 1: Machine learning is great, isn't it? ...
202+
```python
203+
>>> from transformers import FSMTTokenizer, FSMTForConditionalGeneration
204+
205+
>>> mname = "facebook/wmt19-ru-en"
206+
>>> model = FSMTForConditionalGeneration.from_pretrained(mname)
207+
>>> tokenizer = FSMTTokenizer.from_pretrained(mname)
208+
209+
>>> src_text = "Машинное обучение - это здорово, не так ли?"
210+
>>> input_ids = tokenizer(src_text, return_tensors="pt")
211+
>>> outputs = model.generate(input_ids, num_beams=5, num_return_sequences=3)
212+
>>> tokenizer.decode(outputs[0], skip_special_tokens=True)
213+
"Machine learning is great, isn't it?"
214+
```
212215
213216
"""
214217

src/transformers/models/led/modeling_led.py

Lines changed: 35 additions & 30 deletions
Original file line numberDiff line numberDiff line change
@@ -1454,36 +1454,41 @@ class LEDSeq2SeqQuestionAnsweringModelOutput(ModelOutput):
14541454
"""
14551455

14561456
LED_GENERATION_EXAMPLE = r"""
1457-
Summarization example::
1458-
1459-
>>> import torch >>> from transformers import LEDTokenizer, LEDForConditionalGeneration
1460-
1461-
>>> model = LEDForConditionalGeneration.from_pretrained('allenai/led-large-16384-arxiv') >>> tokenizer =
1462-
LEDTokenizer.from_pretrained('allenai/led-large-16384-arxiv')
1463-
1464-
>>> ARTICLE_TO_SUMMARIZE = '''Transformers (Vaswani et al., 2017) have achieved state-of-the-art ... results in
1465-
a wide range of natural language tasks including generative ... language modeling (Dai et al., 2019; Radford et
1466-
al., 2019) and discriminative ... language understanding (Devlin et al., 2019). This success is partly due to
1467-
... the self-attention component which enables the network to capture contextual ... information from the
1468-
entire sequence. While powerful, the memory and computational ... requirements of self-attention grow
1469-
quadratically with sequence length, making ... it infeasible (or very expensive) to process long sequences. ...
1470-
... To address this limitation, we present Longformer, a modified Transformer ... architecture with a
1471-
self-attention operation that scales linearly with the ... sequence length, making it versatile for processing
1472-
long documents (Fig 1). This ... is an advantage for natural language tasks such as long document
1473-
classification, ... question answering (QA), and coreference resolution, where existing approaches ...
1474-
partition or shorten the long context into smaller sequences that fall within the ... typical 512 token limit
1475-
of BERT-style pretrained models. Such partitioning could ... potentially result in loss of important
1476-
cross-partition information, and to ... mitigate this problem, existing methods often rely on complex
1477-
architectures to ... address such interactions. On the other hand, our proposed Longformer is able to ... build
1478-
contextual representations of the entire context using multiple layers of ... attention, reducing the need for
1479-
task-specific architectures.''' >>> inputs = tokenizer.encode(ARTICLE_TO_SUMMARIZE, return_tensors='pt')
1480-
1481-
>>> # Global attention on the first token (cf. Beltagy et al. 2020) >>> global_attention_mask =
1482-
torch.zeros_like(inputs) >>> global_attention_mask[:, 0] = 1
1483-
1484-
>>> # Generate Summary >>> summary_ids = model.generate(inputs, global_attention_mask=global_attention_mask,
1485-
... num_beams=3, max_length=32, early_stopping=True) >>> print(tokenizer.decode(summary_ids[0],
1486-
skip_special_tokens=True, clean_up_tokenization_spaces=True))
1457+
Summarization example:
1458+
1459+
```python
1460+
>>> import torch
1461+
>>> from transformers import LEDTokenizer, LEDForConditionalGeneration
1462+
1463+
>>> model = LEDForConditionalGeneration.from_pretrained("allenai/led-large-16384-arxiv")
1464+
>>> tokenizer = LEDTokenizer.from_pretrained("allenai/led-large-16384-arxiv")
1465+
1466+
>>> ARTICLE_TO_SUMMARIZE = '''Transformers (Vaswani et al., 2017) have achieved state-of-the-art
1467+
... results in a wide range of natural language tasks including generative language modeling
1468+
... (Dai et al., 2019; Radford et al., 2019) and discriminative ... language understanding (Devlin et al., 2019).
1469+
... This success is partly due to the self-attention component which enables the network to capture contextual
1470+
... information from the entire sequence. While powerful, the memory and computational requirements of
1471+
... self-attention grow quadratically with sequence length, making it infeasible (or very expensive) to
1472+
... process long sequences. To address this limitation, we present Longformer, a modified Transformer
1473+
... architecture with a self-attention operation that scales linearly with the sequence length, making it
1474+
... versatile for processing long documents (Fig 1). This is an advantage for natural language tasks such as
1475+
... long document classification, question answering (QA), and coreference resolution, where existing approaches
1476+
... partition or shorten the long context into smaller sequences that fall within the typical 512 token limit
1477+
... of BERT-style pretrained models. Such partitioning could potentially result in loss of important
1478+
... cross-partition information, and to mitigate this problem, existing methods often rely on complex
1479+
... architectures to address such interactions. On the other hand, our proposed Longformer is able to build
1480+
... contextual representations of the entire context using multiple layers of attention, reducing the need for
1481+
... task-specific architectures.'''
1482+
>>> inputs = tokenizer.encode(ARTICLE_TO_SUMMARIZE, return_tensors="pt")
1483+
1484+
>>> # Global attention on the first token (cf. Beltagy et al. 2020)
1485+
>>> global_attention_mask = torch.zeros_like(inputs)
1486+
>>> global_attention_mask[:, 0] = 1
1487+
1488+
>>> # Generate Summary
1489+
>>> summary_ids = model.generate(inputs, global_attention_mask=global_attention_mask, num_beams=3, max_length=32)
1490+
>>> print(tokenizer.decode(summary_ids[0], skip_special_tokens=True, clean_up_tokenization_spaces=True))
1491+
```
14871492
"""
14881493

14891494
LED_INPUTS_DOCSTRING = r"""

src/transformers/models/m2m_100/modeling_m2m_100.py

Lines changed: 10 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -566,17 +566,19 @@ def _set_gradient_checkpointing(self, module, value=False):
566566
M2M_100_GENERATION_EXAMPLE = r"""
567567
Translation example::
568568
569-
>>> from transformers import M2M100Tokenizer, M2M100ForConditionalGeneration
569+
```python
570+
>>> from transformers import M2M100Tokenizer, M2M100ForConditionalGeneration
570571
571-
>>> model = M2M100ForConditionalGeneration.from_pretrained('facebook/m2m100_418M') >>> tokenizer =
572-
M2M100Tokenizer.from_pretrained('facebook/m2m100_418M')
572+
>>> model = M2M100ForConditionalGeneration.from_pretrained("facebook/m2m100_418M")
573+
>>> tokenizer = M2M100Tokenizer.from_pretrained("facebook/m2m100_418M")
573574
574-
>>> text_to_translate = "Life is like a box of chocolates" >>> model_inputs = tokenizer(text_to_translate,
575-
return_tensors='pt')
575+
>>> text_to_translate = "Life is like a box of chocolates"
576+
>>> model_inputs = tokenizer(text_to_translate, return_tensors="pt")
576577
577-
>>> # translate to French >>> gen_tokens = model.generate( **model_inputs,
578-
forced_bos_token_id=tokenizer.get_lang_id("fr")) >>> print(tokenizer.batch_decode(gen_tokens,
579-
skip_special_tokens=True))
578+
>>> # translate to French
579+
>>> gen_tokens = model.generate(**model_inputs, forced_bos_token_id=tokenizer.get_lang_id("fr"))
580+
>>> print(tokenizer.batch_decode(gen_tokens, skip_special_tokens=True))
581+
```
580582
"""
581583

582584
M2M_100_INPUTS_DOCSTRING = r"""

0 commit comments

Comments
 (0)