Skip to content
Merged
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
39 changes: 23 additions & 16 deletions src/transformers/models/bart/modeling_bart.py
Original file line number Diff line number Diff line change
Expand Up @@ -536,31 +536,38 @@ def __init_subclass__(self):
BART_GENERATION_EXAMPLE = r"""
Summarization example::

>>> from transformers import BartTokenizer, BartForConditionalGeneration, BartConfig
```python
>>> from transformers import BartTokenizer, BartForConditionalGeneration

>>> model = BartForConditionalGeneration.from_pretrained('facebook/bart-large-cnn') >>> tokenizer =
BartTokenizer.from_pretrained('facebook/bart-large-cnn')
>>> model = BartForConditionalGeneration.from_pretrained("facebook/bart-large-cnn")
>>> tokenizer = BartTokenizer.from_pretrained("facebook/bart-large-cnn")

>>> ARTICLE_TO_SUMMARIZE = "My friends are cool but they eat too many carbs." >>> inputs =
tokenizer([ARTICLE_TO_SUMMARIZE], max_length=1024, return_tensors='pt')
>>> ARTICLE_TO_SUMMARIZE = "My friends are cool but they eat too many carbs."
>>> inputs = tokenizer([ARTICLE_TO_SUMMARIZE], max_length=1024, return_tensors="pt")

>>> # Generate Summary >>> summary_ids = model.generate(inputs['input_ids'], num_beams=4, max_length=5,
early_stopping=True) >>> print([tokenizer.decode(g, skip_special_tokens=True,
clean_up_tokenization_spaces=False) for g in summary_ids])
>>> # Generate Summary
>>> summary_ids = model.generate(inputs["input_ids"], num_beams=4, max_length=5)
>>> print(tokenizer.batch_decode(summary_ids, skip_special_tokens=True, clean_up_tokenization_spaces=False))
```

Mask filling example::

>>> from transformers import BartTokenizer, BartForConditionalGeneration >>> tokenizer =
BartTokenizer.from_pretrained('facebook/bart-large') >>> TXT = "My friends are <mask> but they eat too many
carbs."
```python
>>> from transformers import BartTokenizer, BartForConditionalGeneration

>>> model = BartForConditionalGeneration.from_pretrained('facebook/bart-large') >>> input_ids =
tokenizer([TXT], return_tensors='pt')['input_ids'] >>> logits = model(input_ids).logits
>>> tokenizer = BartTokenizer.from_pretrained("facebook/bart-large")
>>> TXT = "My friends are <mask> but they eat too many carbs."

>>> masked_index = (input_ids[0] == tokenizer.mask_token_id).nonzero().item() >>> probs = logits[0,
masked_index].softmax(dim=0) >>> values, predictions = probs.topk(5)
>>> model = BartForConditionalGeneration.from_pretrained("facebook/bart-large")
>>> input_ids = tokenizer([TXT], return_tensors="pt")["input_ids"]
>>> logits = model(input_ids).logits

>>> tokenizer.decode(predictions).split()
>>> masked_index = (input_ids[0] == tokenizer.mask_token_id).nonzero().item()
>>> probs = logits[0, masked_index].softmax(dim=0)
>>> values, predictions = probs.topk(5)

>>> tokenizer.decode(predictions).split()
```
"""

BART_INPUTS_DOCSTRING = r"""
Expand Down
38 changes: 23 additions & 15 deletions src/transformers/models/bart/modeling_flax_bart.py
Original file line number Diff line number Diff line change
Expand Up @@ -1508,30 +1508,38 @@ def update_inputs_for_generation(self, model_outputs, model_kwargs):

Summarization example::

>>> from transformers import BartTokenizer, FlaxBartForConditionalGeneration
```python
>>> from transformers import BartTokenizer, FlaxBartForConditionalGeneration

>>> model = FlaxBartForConditionalGeneration.from_pretrained('facebook/bart-large-cnn') >>> tokenizer =
BartTokenizer.from_pretrained('facebook/bart-large-cnn')
>>> model = FlaxBartForConditionalGeneration.from_pretrained("facebook/bart-large-cnn")
>>> tokenizer = BartTokenizer.from_pretrained("facebook/bart-large-cnn")

>>> ARTICLE_TO_SUMMARIZE = "My friends are cool but they eat too many carbs." >>> inputs =
tokenizer([ARTICLE_TO_SUMMARIZE], max_length=1024, return_tensors='jax')
>>> ARTICLE_TO_SUMMARIZE = "My friends are cool but they eat too many carbs."
>>> inputs = tokenizer([ARTICLE_TO_SUMMARIZE], max_length=1024, return_tensors="np")

>>> # Generate Summary >>> summary_ids = model.generate(inputs['input_ids']).sequences >>>
print(tokenizer.batch_decode(summary_ids, skip_special_tokens=True, clean_up_tokenization_spaces=False))
>>> # Generate Summary
>>> summary_ids = model.generate(inputs["input_ids"]).sequences
>>> print(tokenizer.batch_decode(summary_ids, skip_special_tokens=True, clean_up_tokenization_spaces=False))
```

Mask filling example::

>>> from transformers import BartTokenizer, FlaxBartForConditionalGeneration >>> tokenizer =
BartTokenizer.from_pretrained('facebook/bart-large') >>> TXT = "My friends are <mask> but they eat too many
carbs."
```python
>>> from transformers import BartTokenizer, FlaxBartForConditionalGeneration

>>> model = FlaxBartForConditionalGeneration.from_pretrained("facebook/bart-large")
>>> tokenizer = BartTokenizer.from_pretrained("facebook/bart-large")

>>> model = FlaxBartForConditionalGeneration.from_pretrained('facebook/bart-large') >>> input_ids =
tokenizer([TXT], return_tensors='jax')['input_ids'] >>> logits = model(input_ids).logits
>>> TXT = "My friends are <mask> but they eat too many carbs."
>>> input_ids = tokenizer([TXT], return_tensors="jax")["input_ids"]

>>> masked_index = (input_ids[0] == tokenizer.mask_token_id).nonzero()[0].item() >>> probs =
jax.nn.softmax(logits[0, masked_index], axis=0) >>> values, predictions = jax.lax.top_k(probs)
>>> logits = model(input_ids).logits
>>> masked_index = (input_ids[0] == tokenizer.mask_token_id).nonzero()[0].item()
>>> probs = jax.nn.softmax(logits[0, masked_index], axis=0)
>>> values, predictions = jax.lax.top_k(probs)

>>> tokenizer.decode(predictions).split()
>>> tokenizer.decode(predictions).split()
```
"""

overwrite_call_docstring(
Expand Down
35 changes: 21 additions & 14 deletions src/transformers/models/bart/modeling_tf_bart.py
Original file line number Diff line number Diff line change
Expand Up @@ -512,27 +512,34 @@ def serving(self, inputs):
BART_GENERATION_EXAMPLE = r"""
Summarization example::

>>> from transformers import BartTokenizer, TFBartForConditionalGeneration, BartConfig
```python
>>> from transformers import BartTokenizer, TFBartForConditionalGeneration

>>> model = TFBartForConditionalGeneration.from_pretrained('facebook/bart-large') >>> tokenizer =
BartTokenizer.from_pretrained('facebook/bart-large')
>>> model = TFBartForConditionalGeneration.from_pretrained("facebook/bart-large")
>>> tokenizer = BartTokenizer.from_pretrained("facebook/bart-large")

>>> ARTICLE_TO_SUMMARIZE = "My friends are cool but they eat too many carbs." >>> inputs =
tokenizer([ARTICLE_TO_SUMMARIZE], max_length=1024, return_tensors='tf')
>>> ARTICLE_TO_SUMMARIZE = "My friends are cool but they eat too many carbs."
>>> inputs = tokenizer([ARTICLE_TO_SUMMARIZE], max_length=1024, return_tensors="tf")

>>> # Generate Summary >>> summary_ids = model.generate(inputs['input_ids'], num_beams=4, max_length=5,
early_stopping=True) >>> print([tokenizer.decode(g, skip_special_tokens=True,
clean_up_tokenization_spaces=False) for g in summary_ids])
>>> # Generate Summary
>>> summary_ids = model.generate(inputs["input_ids"], num_beams=4, max_length=5)
>>> print(tokenizer.batch_decode(summary_ids, skip_special_tokens=True, clean_up_tokenization_spaces=False))
```

Mask filling example::

>>> from transformers import BartTokenizer, TFBartForConditionalGeneration >>> tokenizer =
BartTokenizer.from_pretrained('facebook/bart-large') >>> TXT = "My friends are <mask> but they eat too many
carbs."
```python
>>> from transformers import BartTokenizer, TFBartForConditionalGeneration

>>> model = TFBartForConditionalGeneration.from_pretrained('facebook/bart-large') >>> input_ids =
tokenizer([TXT], return_tensors='tf')['input_ids'] >>> logits = model(input_ids).logits >>> probs =
tf.nn.softmax(logits[0]) >>> # probs[5] is associated with the mask token
>>> tokenizer = BartTokenizer.from_pretrained("facebook/bart-large")
>>> TXT = "My friends are <mask> but they eat too many carbs."

>>> model = TFBartForConditionalGeneration.from_pretrained("facebook/bart-large")
>>> input_ids = tokenizer([TXT], return_tensors="tf")["input_ids"]
>>> logits = model(input_ids).logits
>>> probs = tf.nn.softmax(logits[0])
>>> # probs[5] is associated with the mask token
```
"""


Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -1621,17 +1621,19 @@ def dummy_inputs(self):
BIGBIRD_PEGASUS_GENERATION_EXAMPLE = r"""
Summarization example::

>>> from transformers import PegasusTokenizer, BigBirdPegasusForConditionalGeneration, BigBirdPegasusConfig
```python
>>> from transformers import PegasusTokenizer, BigBirdPegasusForConditionalGeneration

>>> model = BigBirdPegasusForConditionalGeneration.from_pretrained('google/bigbird-pegasus-large-arxiv') >>>
tokenizer = PegasusTokenizer.from_pretrained('google/bigbird-pegasus-large-arxiv')
>>> model = BigBirdPegasusForConditionalGeneration.from_pretrained("google/bigbird-pegasus-large-arxiv")
>>> tokenizer = PegasusTokenizer.from_pretrained("google/bigbird-pegasus-large-arxiv")

>>> ARTICLE_TO_SUMMARIZE = "My friends are cool but they eat too many carbs." >>> inputs =
tokenizer([ARTICLE_TO_SUMMARIZE], max_length=4096, return_tensors='pt', truncation=True)
>>> ARTICLE_TO_SUMMARIZE = "My friends are cool but they eat too many carbs."
>>> inputs = tokenizer([ARTICLE_TO_SUMMARIZE], max_length=4096, return_tensors="pt", truncation=True)

>>> # Generate Summary >>> summary_ids = model.generate(inputs['input_ids'], num_beams=4, max_length=5,
early_stopping=True) >>> print([tokenizer.decode(g, skip_special_tokens=True,
clean_up_tokenization_spaces=False) for g in summary_ids])
>>> # Generate Summary
>>> summary_ids = model.generate(inputs["input_ids"], num_beams=4, max_length=5)
>>> print(tokenizer.batch_decode(summary_ids, skip_special_tokens=True, clean_up_tokenization_spaces=False))
```
"""

BIGBIRD_PEGASUS_INPUTS_DOCSTRING = r"""
Expand Down
23 changes: 13 additions & 10 deletions src/transformers/models/fsmt/modeling_fsmt.py
Original file line number Diff line number Diff line change
Expand Up @@ -199,16 +199,19 @@
FSMT_GENERATION_EXAMPLE = r"""
Translation example::

from transformers import FSMTTokenizer, FSMTForConditionalGeneration

mname = "facebook/wmt19-ru-en" model = FSMTForConditionalGeneration.from_pretrained(mname) tokenizer =
FSMTTokenizer.from_pretrained(mname)

src_text = "Машинное обучение - это здорово, не так ли?" input_ids = tokenizer.encode(src_text,
return_tensors='pt') outputs = model.generate(input_ids, num_beams=5, num_return_sequences=3) for i, output in
enumerate(outputs):
decoded = tokenizer.decode(output, skip_special_tokens=True) print(f"{i}: {decoded})
# 1: Machine learning is great, isn't it? ...
```python
>>> from transformers import FSMTTokenizer, FSMTForConditionalGeneration

>>> mname = "facebook/wmt19-ru-en"
>>> model = FSMTForConditionalGeneration.from_pretrained(mname)
>>> tokenizer = FSMTTokenizer.from_pretrained(mname)

>>> src_text = "Машинное обучение - это здорово, не так ли?"
>>> input_ids = tokenizer(src_text, return_tensors="pt")
>>> outputs = model.generate(input_ids, num_beams=5, num_return_sequences=3)
>>> tokenizer.decode(outputs[0], skip_special_tokens=True)
"Machine learning is great, isn't it?"
```

"""

Expand Down
61 changes: 33 additions & 28 deletions src/transformers/models/led/modeling_led.py
Original file line number Diff line number Diff line change
Expand Up @@ -1456,34 +1456,39 @@ class LEDSeq2SeqQuestionAnsweringModelOutput(ModelOutput):
LED_GENERATION_EXAMPLE = r"""
Summarization example::

>>> import torch >>> from transformers import LEDTokenizer, LEDForConditionalGeneration

>>> model = LEDForConditionalGeneration.from_pretrained('allenai/led-large-16384-arxiv') >>> tokenizer =
LEDTokenizer.from_pretrained('allenai/led-large-16384-arxiv')

>>> ARTICLE_TO_SUMMARIZE = '''Transformers (Vaswani et al., 2017) have achieved state-of-the-art ... results in
a wide range of natural language tasks including generative ... language modeling (Dai et al., 2019; Radford et
al., 2019) and discriminative ... language understanding (Devlin et al., 2019). This success is partly due to
... the self-attention component which enables the network to capture contextual ... information from the
entire sequence. While powerful, the memory and computational ... requirements of self-attention grow
quadratically with sequence length, making ... it infeasible (or very expensive) to process long sequences. ...
... To address this limitation, we present Longformer, a modified Transformer ... architecture with a
self-attention operation that scales linearly with the ... sequence length, making it versatile for processing
long documents (Fig 1). This ... is an advantage for natural language tasks such as long document
classification, ... question answering (QA), and coreference resolution, where existing approaches ...
partition or shorten the long context into smaller sequences that fall within the ... typical 512 token limit
of BERT-style pretrained models. Such partitioning could ... potentially result in loss of important
cross-partition information, and to ... mitigate this problem, existing methods often rely on complex
architectures to ... address such interactions. On the other hand, our proposed Longformer is able to ... build
contextual representations of the entire context using multiple layers of ... attention, reducing the need for
task-specific architectures.''' >>> inputs = tokenizer.encode(ARTICLE_TO_SUMMARIZE, return_tensors='pt')

>>> # Global attention on the first token (cf. Beltagy et al. 2020) >>> global_attention_mask =
torch.zeros_like(inputs) >>> global_attention_mask[:, 0] = 1

>>> # Generate Summary >>> summary_ids = model.generate(inputs, global_attention_mask=global_attention_mask,
... num_beams=3, max_length=32, early_stopping=True) >>> print(tokenizer.decode(summary_ids[0],
skip_special_tokens=True, clean_up_tokenization_spaces=True))
```python
>>> import torch
>>> from transformers import LEDTokenizer, LEDForConditionalGeneration

>>> model = LEDForConditionalGeneration.from_pretrained("allenai/led-large-16384-arxiv")
>>> tokenizer = LEDTokenizer.from_pretrained("allenai/led-large-16384-arxiv")

>>> ARTICLE_TO_SUMMARIZE = '''Transformers (Vaswani et al., 2017) have achieved state-of-the-art
... results in a wide range of natural language tasks including generative language modeling
... (Dai et al., 2019; Radford et al., 2019) and discriminative ... language understanding (Devlin et al., 2019).
... This success is partly due to the self-attention component which enables the network to capture contextual
... information from the entire sequence. While powerful, the memory and computational requirements of
... self-attention grow quadratically with sequence length, making it infeasible (or very expensive) to
... process long sequences. To address this limitation, we present Longformer, a modified Transformer
... architecture with a self-attention operation that scales linearly with the sequence length, making it
... versatile for processing long documents (Fig 1). This is an advantage for natural language tasks such as
... long document classification, question answering (QA), and coreference resolution, where existing approaches
... partition or shorten the long context into smaller sequences that fall within the typical 512 token limit
... of BERT-style pretrained models. Such partitioning could potentially result in loss of important
... cross-partition information, and to mitigate this problem, existing methods often rely on complex
... architectures to address such interactions. On the other hand, our proposed Longformer is able to build
... contextual representations of the entire context using multiple layers of attention, reducing the need for
... task-specific architectures.'''
>>> inputs = tokenizer.encode(ARTICLE_TO_SUMMARIZE, return_tensors="pt")

>>> # Global attention on the first token (cf. Beltagy et al. 2020)
>>> global_attention_mask = torch.zeros_like(inputs)
>>> global_attention_mask[:, 0] = 1

>>> # Generate Summary
>>> summary_ids = model.generate(inputs, global_attention_mask=global_attention_mask, num_beams=3, max_length=32)
>>> print(tokenizer.decode(summary_ids[0], skip_special_tokens=True, clean_up_tokenization_spaces=True))
```
"""

LED_INPUTS_DOCSTRING = r"""
Expand Down
18 changes: 10 additions & 8 deletions src/transformers/models/m2m_100/modeling_m2m_100.py
Original file line number Diff line number Diff line change
Expand Up @@ -566,17 +566,19 @@ def _set_gradient_checkpointing(self, module, value=False):
M2M_100_GENERATION_EXAMPLE = r"""
Translation example::

>>> from transformers import M2M100Tokenizer, M2M100ForConditionalGeneration
```python
>>> from transformers import M2M100Tokenizer, M2M100ForConditionalGeneration

>>> model = M2M100ForConditionalGeneration.from_pretrained('facebook/m2m100_418M') >>> tokenizer =
M2M100Tokenizer.from_pretrained('facebook/m2m100_418M')
>>> model = M2M100ForConditionalGeneration.from_pretrained("facebook/m2m100_418M")
>>> tokenizer = M2M100Tokenizer.from_pretrained("facebook/m2m100_418M")

>>> text_to_translate = "Life is like a box of chocolates" >>> model_inputs = tokenizer(text_to_translate,
return_tensors='pt')
>>> text_to_translate = "Life is like a box of chocolates"
>>> model_inputs = tokenizer(text_to_translate, return_tensors="pt")

>>> # translate to French >>> gen_tokens = model.generate( **model_inputs,
forced_bos_token_id=tokenizer.get_lang_id("fr")) >>> print(tokenizer.batch_decode(gen_tokens,
skip_special_tokens=True))
>>> # translate to French
>>> gen_tokens = model.generate(**model_inputs, forced_bos_token_id=tokenizer.get_lang_id("fr"))
>>> print(tokenizer.batch_decode(gen_tokens, skip_special_tokens=True))
```
"""

M2M_100_INPUTS_DOCSTRING = r"""
Expand Down
Loading