diff --git a/src/transformers/models/bart/modeling_bart.py b/src/transformers/models/bart/modeling_bart.py index 4191f1d8e3df..381204cf2d30 100755 --- a/src/transformers/models/bart/modeling_bart.py +++ b/src/transformers/models/bart/modeling_bart.py @@ -534,33 +534,40 @@ def __init_subclass__(self): """ BART_GENERATION_EXAMPLE = r""" - Summarization example:: + Summarization example: - >>> from transformers import BartTokenizer, BartForConditionalGeneration, BartConfig + ```python + >>> from transformers import BartTokenizer, BartForConditionalGeneration - >>> model = BartForConditionalGeneration.from_pretrained('facebook/bart-large-cnn') >>> tokenizer = - BartTokenizer.from_pretrained('facebook/bart-large-cnn') + >>> model = BartForConditionalGeneration.from_pretrained("facebook/bart-large-cnn") + >>> tokenizer = BartTokenizer.from_pretrained("facebook/bart-large-cnn") - >>> ARTICLE_TO_SUMMARIZE = "My friends are cool but they eat too many carbs." >>> inputs = - tokenizer([ARTICLE_TO_SUMMARIZE], max_length=1024, return_tensors='pt') + >>> ARTICLE_TO_SUMMARIZE = "My friends are cool but they eat too many carbs." + >>> inputs = tokenizer([ARTICLE_TO_SUMMARIZE], max_length=1024, return_tensors="pt") - >>> # Generate Summary >>> summary_ids = model.generate(inputs['input_ids'], num_beams=4, max_length=5, - early_stopping=True) >>> print([tokenizer.decode(g, skip_special_tokens=True, - clean_up_tokenization_spaces=False) for g in summary_ids]) + >>> # Generate Summary + >>> summary_ids = model.generate(inputs["input_ids"], num_beams=4, max_length=5) + >>> print(tokenizer.batch_decode(summary_ids, skip_special_tokens=True, clean_up_tokenization_spaces=False)) + ``` - Mask filling example:: + Mask filling example: - >>> from transformers import BartTokenizer, BartForConditionalGeneration >>> tokenizer = - BartTokenizer.from_pretrained('facebook/bart-large') >>> TXT = "My friends are but they eat too many - carbs." + ```python + >>> from transformers import BartTokenizer, BartForConditionalGeneration - >>> model = BartForConditionalGeneration.from_pretrained('facebook/bart-large') >>> input_ids = - tokenizer([TXT], return_tensors='pt')['input_ids'] >>> logits = model(input_ids).logits + >>> tokenizer = BartTokenizer.from_pretrained("facebook/bart-large") + >>> TXT = "My friends are but they eat too many carbs." - >>> masked_index = (input_ids[0] == tokenizer.mask_token_id).nonzero().item() >>> probs = logits[0, - masked_index].softmax(dim=0) >>> values, predictions = probs.topk(5) + >>> model = BartForConditionalGeneration.from_pretrained("facebook/bart-large") + >>> input_ids = tokenizer([TXT], return_tensors="pt")["input_ids"] + >>> logits = model(input_ids).logits - >>> tokenizer.decode(predictions).split() + >>> masked_index = (input_ids[0] == tokenizer.mask_token_id).nonzero().item() + >>> probs = logits[0, masked_index].softmax(dim=0) + >>> values, predictions = probs.topk(5) + + >>> tokenizer.decode(predictions).split() + ``` """ BART_INPUTS_DOCSTRING = r""" diff --git a/src/transformers/models/bart/modeling_flax_bart.py b/src/transformers/models/bart/modeling_flax_bart.py index f850227b3a9a..55945a72d94f 100644 --- a/src/transformers/models/bart/modeling_flax_bart.py +++ b/src/transformers/models/bart/modeling_flax_bart.py @@ -1506,32 +1506,40 @@ def update_inputs_for_generation(self, model_outputs, model_kwargs): FLAX_BART_CONDITIONAL_GENERATION_DOCSTRING = """ Returns: - Summarization example:: + Summarization example: - >>> from transformers import BartTokenizer, FlaxBartForConditionalGeneration + ```python + >>> from transformers import BartTokenizer, FlaxBartForConditionalGeneration + + >>> model = FlaxBartForConditionalGeneration.from_pretrained("facebook/bart-large-cnn") + >>> tokenizer = BartTokenizer.from_pretrained("facebook/bart-large-cnn") - >>> model = FlaxBartForConditionalGeneration.from_pretrained('facebook/bart-large-cnn') >>> tokenizer = - BartTokenizer.from_pretrained('facebook/bart-large-cnn') + >>> ARTICLE_TO_SUMMARIZE = "My friends are cool but they eat too many carbs." + >>> inputs = tokenizer([ARTICLE_TO_SUMMARIZE], max_length=1024, return_tensors="np") - >>> ARTICLE_TO_SUMMARIZE = "My friends are cool but they eat too many carbs." >>> inputs = - tokenizer([ARTICLE_TO_SUMMARIZE], max_length=1024, return_tensors='jax') + >>> # Generate Summary + >>> summary_ids = model.generate(inputs["input_ids"]).sequences + >>> print(tokenizer.batch_decode(summary_ids, skip_special_tokens=True, clean_up_tokenization_spaces=False)) + ``` - >>> # Generate Summary >>> summary_ids = model.generate(inputs['input_ids']).sequences >>> - print(tokenizer.batch_decode(summary_ids, skip_special_tokens=True, clean_up_tokenization_spaces=False)) + Mask filling example: - Mask filling example:: + ```python + >>> from transformers import BartTokenizer, FlaxBartForConditionalGeneration - >>> from transformers import BartTokenizer, FlaxBartForConditionalGeneration >>> tokenizer = - BartTokenizer.from_pretrained('facebook/bart-large') >>> TXT = "My friends are but they eat too many - carbs." + >>> model = FlaxBartForConditionalGeneration.from_pretrained("facebook/bart-large") + >>> tokenizer = BartTokenizer.from_pretrained("facebook/bart-large") - >>> model = FlaxBartForConditionalGeneration.from_pretrained('facebook/bart-large') >>> input_ids = - tokenizer([TXT], return_tensors='jax')['input_ids'] >>> logits = model(input_ids).logits + >>> TXT = "My friends are but they eat too many carbs." + >>> input_ids = tokenizer([TXT], return_tensors="jax")["input_ids"] - >>> masked_index = (input_ids[0] == tokenizer.mask_token_id).nonzero()[0].item() >>> probs = - jax.nn.softmax(logits[0, masked_index], axis=0) >>> values, predictions = jax.lax.top_k(probs) + >>> logits = model(input_ids).logits + >>> masked_index = (input_ids[0] == tokenizer.mask_token_id).nonzero()[0].item() + >>> probs = jax.nn.softmax(logits[0, masked_index], axis=0) + >>> values, predictions = jax.lax.top_k(probs) - >>> tokenizer.decode(predictions).split() + >>> tokenizer.decode(predictions).split() + ``` """ overwrite_call_docstring( diff --git a/src/transformers/models/bart/modeling_tf_bart.py b/src/transformers/models/bart/modeling_tf_bart.py index 04d32aecab03..9cc49f147296 100644 --- a/src/transformers/models/bart/modeling_tf_bart.py +++ b/src/transformers/models/bart/modeling_tf_bart.py @@ -510,29 +510,36 @@ def serving(self, inputs): BART_GENERATION_EXAMPLE = r""" - Summarization example:: + Summarization example: - >>> from transformers import BartTokenizer, TFBartForConditionalGeneration, BartConfig + ```python + >>> from transformers import BartTokenizer, TFBartForConditionalGeneration - >>> model = TFBartForConditionalGeneration.from_pretrained('facebook/bart-large') >>> tokenizer = - BartTokenizer.from_pretrained('facebook/bart-large') + >>> model = TFBartForConditionalGeneration.from_pretrained("facebook/bart-large") + >>> tokenizer = BartTokenizer.from_pretrained("facebook/bart-large") - >>> ARTICLE_TO_SUMMARIZE = "My friends are cool but they eat too many carbs." >>> inputs = - tokenizer([ARTICLE_TO_SUMMARIZE], max_length=1024, return_tensors='tf') + >>> ARTICLE_TO_SUMMARIZE = "My friends are cool but they eat too many carbs." + >>> inputs = tokenizer([ARTICLE_TO_SUMMARIZE], max_length=1024, return_tensors="tf") - >>> # Generate Summary >>> summary_ids = model.generate(inputs['input_ids'], num_beams=4, max_length=5, - early_stopping=True) >>> print([tokenizer.decode(g, skip_special_tokens=True, - clean_up_tokenization_spaces=False) for g in summary_ids]) + >>> # Generate Summary + >>> summary_ids = model.generate(inputs["input_ids"], num_beams=4, max_length=5) + >>> print(tokenizer.batch_decode(summary_ids, skip_special_tokens=True, clean_up_tokenization_spaces=False)) + ``` - Mask filling example:: + Mask filling example: - >>> from transformers import BartTokenizer, TFBartForConditionalGeneration >>> tokenizer = - BartTokenizer.from_pretrained('facebook/bart-large') >>> TXT = "My friends are but they eat too many - carbs." + ```python + >>> from transformers import BartTokenizer, TFBartForConditionalGeneration - >>> model = TFBartForConditionalGeneration.from_pretrained('facebook/bart-large') >>> input_ids = - tokenizer([TXT], return_tensors='tf')['input_ids'] >>> logits = model(input_ids).logits >>> probs = - tf.nn.softmax(logits[0]) >>> # probs[5] is associated with the mask token + >>> tokenizer = BartTokenizer.from_pretrained("facebook/bart-large") + >>> TXT = "My friends are but they eat too many carbs." + + >>> model = TFBartForConditionalGeneration.from_pretrained("facebook/bart-large") + >>> input_ids = tokenizer([TXT], return_tensors="tf")["input_ids"] + >>> logits = model(input_ids).logits + >>> probs = tf.nn.softmax(logits[0]) + >>> # probs[5] is associated with the mask token + ``` """ diff --git a/src/transformers/models/bigbird_pegasus/modeling_bigbird_pegasus.py b/src/transformers/models/bigbird_pegasus/modeling_bigbird_pegasus.py index 81c28f4e4d80..c4db1b8bec5a 100755 --- a/src/transformers/models/bigbird_pegasus/modeling_bigbird_pegasus.py +++ b/src/transformers/models/bigbird_pegasus/modeling_bigbird_pegasus.py @@ -1619,19 +1619,21 @@ def dummy_inputs(self): """ BIGBIRD_PEGASUS_GENERATION_EXAMPLE = r""" - Summarization example:: + Summarization example: - >>> from transformers import PegasusTokenizer, BigBirdPegasusForConditionalGeneration, BigBirdPegasusConfig + ```python + >>> from transformers import PegasusTokenizer, BigBirdPegasusForConditionalGeneration - >>> model = BigBirdPegasusForConditionalGeneration.from_pretrained('google/bigbird-pegasus-large-arxiv') >>> - tokenizer = PegasusTokenizer.from_pretrained('google/bigbird-pegasus-large-arxiv') + >>> model = BigBirdPegasusForConditionalGeneration.from_pretrained("google/bigbird-pegasus-large-arxiv") + >>> tokenizer = PegasusTokenizer.from_pretrained("google/bigbird-pegasus-large-arxiv") - >>> ARTICLE_TO_SUMMARIZE = "My friends are cool but they eat too many carbs." >>> inputs = - tokenizer([ARTICLE_TO_SUMMARIZE], max_length=4096, return_tensors='pt', truncation=True) + >>> ARTICLE_TO_SUMMARIZE = "My friends are cool but they eat too many carbs." + >>> inputs = tokenizer([ARTICLE_TO_SUMMARIZE], max_length=4096, return_tensors="pt", truncation=True) - >>> # Generate Summary >>> summary_ids = model.generate(inputs['input_ids'], num_beams=4, max_length=5, - early_stopping=True) >>> print([tokenizer.decode(g, skip_special_tokens=True, - clean_up_tokenization_spaces=False) for g in summary_ids]) + >>> # Generate Summary + >>> summary_ids = model.generate(inputs["input_ids"], num_beams=4, max_length=5) + >>> print(tokenizer.batch_decode(summary_ids, skip_special_tokens=True, clean_up_tokenization_spaces=False)) + ``` """ BIGBIRD_PEGASUS_INPUTS_DOCSTRING = r""" diff --git a/src/transformers/models/blenderbot_small/modeling_flax_blenderbot_small.py b/src/transformers/models/blenderbot_small/modeling_flax_blenderbot_small.py index ffa1eac9d974..c2dcc4be535a 100644 --- a/src/transformers/models/blenderbot_small/modeling_flax_blenderbot_small.py +++ b/src/transformers/models/blenderbot_small/modeling_flax_blenderbot_small.py @@ -1482,7 +1482,7 @@ def update_inputs_for_generation(self, model_outputs, model_kwargs): FLAX_BLENDERBOT_SMALL_CONDITIONAL_GENERATION_DOCSTRING = """ Returns: - Summarization example:: + Summarization example: >>> from transformers import BlenderbotSmallTokenizer, FlaxBlenderbotSmallForConditionalGeneration @@ -1495,7 +1495,7 @@ def update_inputs_for_generation(self, model_outputs, model_kwargs): >>> # Generate Summary >>> summary_ids = model.generate(inputs['input_ids']).sequences >>> print(tokenizer.batch_decode(summary_ids, skip_special_tokens=True, clean_up_tokenization_spaces=False)) - Mask filling example:: + Mask filling example: >>> from transformers import BlenderbotSmallTokenizer, FlaxBlenderbotSmallForConditionalGeneration >>> tokenizer = BlenderbotSmallTokenizer.from_pretrained('facebook/blenderbot_small-90M') >>> TXT = "My friends are diff --git a/src/transformers/models/fsmt/modeling_fsmt.py b/src/transformers/models/fsmt/modeling_fsmt.py index d93cb71d5efd..bb7ba9bd3c6e 100644 --- a/src/transformers/models/fsmt/modeling_fsmt.py +++ b/src/transformers/models/fsmt/modeling_fsmt.py @@ -199,16 +199,19 @@ FSMT_GENERATION_EXAMPLE = r""" Translation example:: - from transformers import FSMTTokenizer, FSMTForConditionalGeneration - - mname = "facebook/wmt19-ru-en" model = FSMTForConditionalGeneration.from_pretrained(mname) tokenizer = - FSMTTokenizer.from_pretrained(mname) - - src_text = "Машинное обучение - это здорово, не так ли?" input_ids = tokenizer.encode(src_text, - return_tensors='pt') outputs = model.generate(input_ids, num_beams=5, num_return_sequences=3) for i, output in - enumerate(outputs): - decoded = tokenizer.decode(output, skip_special_tokens=True) print(f"{i}: {decoded}) - # 1: Machine learning is great, isn't it? ... + ```python + >>> from transformers import FSMTTokenizer, FSMTForConditionalGeneration + + >>> mname = "facebook/wmt19-ru-en" + >>> model = FSMTForConditionalGeneration.from_pretrained(mname) + >>> tokenizer = FSMTTokenizer.from_pretrained(mname) + + >>> src_text = "Машинное обучение - это здорово, не так ли?" + >>> input_ids = tokenizer(src_text, return_tensors="pt") + >>> outputs = model.generate(input_ids, num_beams=5, num_return_sequences=3) + >>> tokenizer.decode(outputs[0], skip_special_tokens=True) + "Machine learning is great, isn't it?" + ``` """ diff --git a/src/transformers/models/led/modeling_led.py b/src/transformers/models/led/modeling_led.py index 0a07044afba9..8054b9ee6d33 100755 --- a/src/transformers/models/led/modeling_led.py +++ b/src/transformers/models/led/modeling_led.py @@ -1454,36 +1454,41 @@ class LEDSeq2SeqQuestionAnsweringModelOutput(ModelOutput): """ LED_GENERATION_EXAMPLE = r""" - Summarization example:: - - >>> import torch >>> from transformers import LEDTokenizer, LEDForConditionalGeneration - - >>> model = LEDForConditionalGeneration.from_pretrained('allenai/led-large-16384-arxiv') >>> tokenizer = - LEDTokenizer.from_pretrained('allenai/led-large-16384-arxiv') - - >>> ARTICLE_TO_SUMMARIZE = '''Transformers (Vaswani et al., 2017) have achieved state-of-the-art ... results in - a wide range of natural language tasks including generative ... language modeling (Dai et al., 2019; Radford et - al., 2019) and discriminative ... language understanding (Devlin et al., 2019). This success is partly due to - ... the self-attention component which enables the network to capture contextual ... information from the - entire sequence. While powerful, the memory and computational ... requirements of self-attention grow - quadratically with sequence length, making ... it infeasible (or very expensive) to process long sequences. ... - ... To address this limitation, we present Longformer, a modified Transformer ... architecture with a - self-attention operation that scales linearly with the ... sequence length, making it versatile for processing - long documents (Fig 1). This ... is an advantage for natural language tasks such as long document - classification, ... question answering (QA), and coreference resolution, where existing approaches ... - partition or shorten the long context into smaller sequences that fall within the ... typical 512 token limit - of BERT-style pretrained models. Such partitioning could ... potentially result in loss of important - cross-partition information, and to ... mitigate this problem, existing methods often rely on complex - architectures to ... address such interactions. On the other hand, our proposed Longformer is able to ... build - contextual representations of the entire context using multiple layers of ... attention, reducing the need for - task-specific architectures.''' >>> inputs = tokenizer.encode(ARTICLE_TO_SUMMARIZE, return_tensors='pt') - - >>> # Global attention on the first token (cf. Beltagy et al. 2020) >>> global_attention_mask = - torch.zeros_like(inputs) >>> global_attention_mask[:, 0] = 1 - - >>> # Generate Summary >>> summary_ids = model.generate(inputs, global_attention_mask=global_attention_mask, - ... num_beams=3, max_length=32, early_stopping=True) >>> print(tokenizer.decode(summary_ids[0], - skip_special_tokens=True, clean_up_tokenization_spaces=True)) + Summarization example: + + ```python + >>> import torch + >>> from transformers import LEDTokenizer, LEDForConditionalGeneration + + >>> model = LEDForConditionalGeneration.from_pretrained("allenai/led-large-16384-arxiv") + >>> tokenizer = LEDTokenizer.from_pretrained("allenai/led-large-16384-arxiv") + + >>> ARTICLE_TO_SUMMARIZE = '''Transformers (Vaswani et al., 2017) have achieved state-of-the-art + ... results in a wide range of natural language tasks including generative language modeling + ... (Dai et al., 2019; Radford et al., 2019) and discriminative ... language understanding (Devlin et al., 2019). + ... This success is partly due to the self-attention component which enables the network to capture contextual + ... information from the entire sequence. While powerful, the memory and computational requirements of + ... self-attention grow quadratically with sequence length, making it infeasible (or very expensive) to + ... process long sequences. To address this limitation, we present Longformer, a modified Transformer + ... architecture with a self-attention operation that scales linearly with the sequence length, making it + ... versatile for processing long documents (Fig 1). This is an advantage for natural language tasks such as + ... long document classification, question answering (QA), and coreference resolution, where existing approaches + ... partition or shorten the long context into smaller sequences that fall within the typical 512 token limit + ... of BERT-style pretrained models. Such partitioning could potentially result in loss of important + ... cross-partition information, and to mitigate this problem, existing methods often rely on complex + ... architectures to address such interactions. On the other hand, our proposed Longformer is able to build + ... contextual representations of the entire context using multiple layers of attention, reducing the need for + ... task-specific architectures.''' + >>> inputs = tokenizer.encode(ARTICLE_TO_SUMMARIZE, return_tensors="pt") + + >>> # Global attention on the first token (cf. Beltagy et al. 2020) + >>> global_attention_mask = torch.zeros_like(inputs) + >>> global_attention_mask[:, 0] = 1 + + >>> # Generate Summary + >>> summary_ids = model.generate(inputs, global_attention_mask=global_attention_mask, num_beams=3, max_length=32) + >>> print(tokenizer.decode(summary_ids[0], skip_special_tokens=True, clean_up_tokenization_spaces=True)) + ``` """ LED_INPUTS_DOCSTRING = r""" diff --git a/src/transformers/models/m2m_100/modeling_m2m_100.py b/src/transformers/models/m2m_100/modeling_m2m_100.py index c775ee254031..32a9aaf91aca 100755 --- a/src/transformers/models/m2m_100/modeling_m2m_100.py +++ b/src/transformers/models/m2m_100/modeling_m2m_100.py @@ -566,17 +566,19 @@ def _set_gradient_checkpointing(self, module, value=False): M2M_100_GENERATION_EXAMPLE = r""" Translation example:: - >>> from transformers import M2M100Tokenizer, M2M100ForConditionalGeneration + ```python + >>> from transformers import M2M100Tokenizer, M2M100ForConditionalGeneration - >>> model = M2M100ForConditionalGeneration.from_pretrained('facebook/m2m100_418M') >>> tokenizer = - M2M100Tokenizer.from_pretrained('facebook/m2m100_418M') + >>> model = M2M100ForConditionalGeneration.from_pretrained("facebook/m2m100_418M") + >>> tokenizer = M2M100Tokenizer.from_pretrained("facebook/m2m100_418M") - >>> text_to_translate = "Life is like a box of chocolates" >>> model_inputs = tokenizer(text_to_translate, - return_tensors='pt') + >>> text_to_translate = "Life is like a box of chocolates" + >>> model_inputs = tokenizer(text_to_translate, return_tensors="pt") - >>> # translate to French >>> gen_tokens = model.generate( **model_inputs, - forced_bos_token_id=tokenizer.get_lang_id("fr")) >>> print(tokenizer.batch_decode(gen_tokens, - skip_special_tokens=True)) + >>> # translate to French + >>> gen_tokens = model.generate(**model_inputs, forced_bos_token_id=tokenizer.get_lang_id("fr")) + >>> print(tokenizer.batch_decode(gen_tokens, skip_special_tokens=True)) + ``` """ M2M_100_INPUTS_DOCSTRING = r""" diff --git a/src/transformers/models/mbart/modeling_flax_mbart.py b/src/transformers/models/mbart/modeling_flax_mbart.py index e909f7700ad0..e721d342993b 100644 --- a/src/transformers/models/mbart/modeling_flax_mbart.py +++ b/src/transformers/models/mbart/modeling_flax_mbart.py @@ -1530,34 +1530,41 @@ def update_inputs_for_generation(self, model_outputs, model_kwargs): FLAX_MBART_CONDITIONAL_GENERATION_DOCSTRING = r""" Returns: - Summarization example:: + Summarization example: - >>> from transformers import MBartTokenizer, FlaxMBartForConditionalGeneration, MBartConfig + ```python + >>> from transformers import MBartTokenizer, FlaxMBartForConditionalGeneration, MBartConfig - >>> model = FlaxMBartForConditionalGeneration.from_pretrained('facebook/mbart-large-cc25') >>> tokenizer = - MBartTokenizer.from_pretrained('facebook/mbart-large-cc25') + >>> model = FlaxMBartForConditionalGeneration.from_pretrained("facebook/mbart-large-cc25") + >>> tokenizer = MBartTokenizer.from_pretrained("facebook/mbart-large-cc25") - >>> ARTICLE_TO_SUMMARIZE = "Meine Freunde sind cool, aber sie essen zu viel Kuchen." >>> inputs = - tokenizer([ARTICLE_TO_SUMMARIZE], max_length=1024, return_tensors='np') + >>> ARTICLE_TO_SUMMARIZE = "Meine Freunde sind cool, aber sie essen zu viel Kuchen." + >>> inputs = tokenizer([ARTICLE_TO_SUMMARIZE], max_length=1024, return_tensors="np") - >>> # Generate Summary >>> summary_ids = model.generate(inputs['input_ids'], num_beams=4, max_length=5, - early_stopping=True).sequences >>> print([tokenizer.decode(g, skip_special_tokens=True, - clean_up_tokenization_spaces=False) for g in summary_ids]) + >>> # Generate Summary + >>> summary_ids = model.generate(inputs["input_ids"], num_beams=4, max_length=5).sequences + >>> print(tokenizer.batch_decode(summary_ids, skip_special_tokens=True, clean_up_tokenization_spaces=False)) + ``` - Mask filling example:: + Mask filling example: - >>> from transformers import MBartTokenizer, FlaxMBartForConditionalGeneration >>> tokenizer = - MBartTokenizer.from_pretrained('facebook/mbart-large-cc25') >>> # de_DE is the language symbol id for - German >>> TXT = " Meine Freunde sind nett aber sie essen zu viel Kuchen. de_DE" + ```python + >>> from transformers import MBartTokenizer, FlaxMBartForConditionalGeneration - >>> model = FlaxMBartForConditionalGeneration.from_pretrained('facebook/mbart-large-cc25') >>> input_ids = - tokenizer([TXT], add_special_tokens=False, return_tensors='np')['input_ids'] >>> logits = - model(input_ids).logits + >>> model = FlaxMBartForConditionalGeneration.from_pretrained("facebook/mbart-large-cc25") + >>> tokenizer = MBartTokenizer.from_pretrained("facebook/mbart-large-cc25") - >>> masked_index = (input_ids[0] == tokenizer.mask_token_id).nonzero()[0].item() >>> probs = logits[0, - masked_index].softmax(dim=0) >>> values, predictions = probs.topk(5) + >>> # de_DE is the language symbol id for German + >>> TXT = " Meine Freunde sind nett aber sie essen zu viel Kuchen. de_DE" + >>> input_ids = tokenizer([TXT], add_special_tokens=False, return_tensors="np")["input_ids"] - >>> tokenizer.decode(predictions).split() + >>> logits = model(input_ids).logits + >>> masked_index = (input_ids[0] == tokenizer.mask_token_id).nonzero()[0].item() + >>> probs = logits[0, masked_index].softmax(dim=0) + >>> values, predictions = probs.topk(5) + + >>> tokenizer.decode(predictions).split() + ``` """ overwrite_call_docstring( diff --git a/src/transformers/models/mbart/modeling_mbart.py b/src/transformers/models/mbart/modeling_mbart.py index 06f7e514e22e..e877da032d09 100755 --- a/src/transformers/models/mbart/modeling_mbart.py +++ b/src/transformers/models/mbart/modeling_mbart.py @@ -532,34 +532,42 @@ def dummy_inputs(self): """ MBART_GENERATION_EXAMPLE = r""" - Summarization example:: + Summarization example: - >>> from transformers import MBartTokenizer, MBartForConditionalGeneration, MBartConfig + ```python + >>> from transformers import MBartTokenizer, MBartForConditionalGeneration - >>> model = MBartForConditionalGeneration.from_pretrained('facebook/mbart-large-cc25') >>> tokenizer = - MBartTokenizer.from_pretrained('facebook/mbart-large-cc25') + >>> model = MBartForConditionalGeneration.from_pretrained("facebook/mbart-large-cc25") + >>> tokenizer = MBartTokenizer.from_pretrained("facebook/mbart-large-cc25") - >>> ARTICLE_TO_SUMMARIZE = "Meine Freunde sind cool, aber sie essen zu viel Kuchen." >>> inputs = - tokenizer([ARTICLE_TO_SUMMARIZE], max_length=1024, return_tensors='pt') + >>> ARTICLE_TO_SUMMARIZE = "Meine Freunde sind cool, aber sie essen zu viel Kuchen." + >>> inputs = tokenizer([ARTICLE_TO_SUMMARIZE], max_length=1024, return_tensors="pt") - >>> # Generate Summary >>> summary_ids = model.generate(inputs['input_ids'], num_beams=4, max_length=5, - early_stopping=True) >>> print([tokenizer.decode(g, skip_special_tokens=True, - clean_up_tokenization_spaces=False) for g in summary_ids]) + >>> # Generate Summary + >>> summary_ids = model.generate(inputs["input_ids"], num_beams=4, max_length=5) + >>> print(tokenizer.batch_decode(summary_ids, skip_special_tokens=True, clean_up_tokenization_spaces=False)) + ``` - Mask filling example:: + Mask filling example: - >>> from transformers import MBartTokenizer, MBartForConditionalGeneration >>> tokenizer = - MBartTokenizer.from_pretrained('facebook/mbart-large-cc25') >>> # de_DE is the language symbol id for - German >>> TXT = " Meine Freunde sind nett aber sie essen zu viel Kuchen. de_DE" + ```python + >>> from transformers import MBartTokenizer, MBartForConditionalGeneration - >>> model = MBartForConditionalGeneration.from_pretrained('facebook/mbart-large-cc25') >>> input_ids = - tokenizer([TXT], add_special_tokens=False, return_tensors='pt')['input_ids'] >>> logits = - model(input_ids).logits + >>> model = MBartForConditionalGeneration.from_pretrained("facebook/mbart-large-cc25") + >>> tokenizer = MBartTokenizer.from_pretrained("facebook/mbart-large-cc25") - >>> masked_index = (input_ids[0] == tokenizer.mask_token_id).nonzero().item() >>> probs = logits[0, - masked_index].softmax(dim=0) >>> values, predictions = probs.topk(5) + >>> # de_DE is the language symbol id for German + >>> TXT = " Meine Freunde sind nett aber sie essen zu viel Kuchen. de_DE" - >>> tokenizer.decode(predictions).split() + >>> input_ids = tokenizer([TXT], add_special_tokens=False, return_tensors="pt")["input_ids"] + >>> logits = model(input_ids).logits + + >>> masked_index = (input_ids[0] == tokenizer.mask_token_id).nonzero().item() + >>> probs = logits[0, masked_index].softmax(dim=0) + >>> values, predictions = probs.topk(5) + + >>> tokenizer.decode(predictions).split() + ``` """ MBART_INPUTS_DOCSTRING = r""" diff --git a/src/transformers/models/mbart/modeling_tf_mbart.py b/src/transformers/models/mbart/modeling_tf_mbart.py index 972a441837d3..a254d9e287ed 100644 --- a/src/transformers/models/mbart/modeling_tf_mbart.py +++ b/src/transformers/models/mbart/modeling_tf_mbart.py @@ -591,29 +591,38 @@ def serving(self, inputs): """ MBART_GENERATION_EXAMPLE = r""" - Summarization example:: + Summarization example: - >>> from transformers import MBartTokenizer, TFMBartForConditionalGeneration, MBartConfig + ```python + >>> from transformers import MBartTokenizer, TFMBartForConditionalGeneration, MBartConfig - >>> model = MBartForConditionalGeneration.from_pretrained('facebook/mbart-large-cc25') >>> tokenizer = - MBartTokenizer.from_pretrained('facebook/mbart-large-cc25') + >>> model = TFMBartForConditionalGeneration.from_pretrained("facebook/mbart-large-cc25") + >>> tokenizer = MBartTokenizer.from_pretrained("facebook/mbart-large-cc25") - >>> ARTICLE_TO_SUMMARIZE = "Meine Freunde sind cool, aber sie essen zu viel Kuchen." >>> inputs = - tokenizer([ARTICLE_TO_SUMMARIZE], max_length=1024, return_tensors='tf') + >>> ARTICLE_TO_SUMMARIZE = "Meine Freunde sind cool, aber sie essen zu viel Kuchen." + >>> inputs = tokenizer([ARTICLE_TO_SUMMARIZE], max_length=1024, return_tensors="tf") - >>> # Generate Summary >>> summary_ids = model.generate(inputs['input_ids'], num_beams=4, max_length=5, - early_stopping=True) >>> print([tokenizer.decode(g, skip_special_tokens=True, - clean_up_tokenization_spaces=False) for g in summary_ids]) + >>> # Generate Summary + >>> summary_ids = model.generate(inputs["input_ids"], num_beams=4, max_length=5) + >>> print(tokenizer.batch_decode(summary_ids, skip_special_tokens=True, clean_up_tokenization_spaces=False)) + ``` - Mask filling example:: + Mask filling example: - >>> from transformers import MBartTokenizer, TFMBartForConditionalGeneration >>> tokenizer = - MBartTokenizer.from_pretrained('facebook/mbart-large-cc25') >>> # de_DE is the language symbol id for - German >>> TXT = " Meine Freunde sind nett aber sie essen zu viel Kuchen. de_DE" + ```python + >>> from transformers import MBartTokenizer, TFMBartForConditionalGeneration - >>> model = MBartForConditionalGeneration.from_pretrained('facebook/mbart-large-cc25') >>> input_ids = - tokenizer([TXT], add_special_tokens=False, return_tensors='tf')['input_ids'] >>> logits = - model(input_ids).logits >>> probs = tf.nn.softmax(logits[0]) >>> # probs[5] is associated with the mask token + >>> model = MBartForConditionalGeneration.from_pretrained("facebook/mbart-large-cc25") + >>> tokenizer = MBartTokenizer.from_pretrained("facebook/mbart-large-cc25") + + >>> # de_DE is the language symbol id for German + >>> TXT = " Meine Freunde sind nett aber sie essen zu viel Kuchen. de_DE" + >>> input_ids = tokenizer([TXT], add_special_tokens=False, return_tensors="tf")["input_ids"] + + >>> logits = model(input_ids).logits + >>> probs = tf.nn.softmax(logits[0]) + >>> # probs[5] is associated with the mask token + ``` """ diff --git a/src/transformers/models/pegasus/modeling_flax_pegasus.py b/src/transformers/models/pegasus/modeling_flax_pegasus.py index 7599909f6a0f..b3ed771e3ac5 100644 --- a/src/transformers/models/pegasus/modeling_flax_pegasus.py +++ b/src/transformers/models/pegasus/modeling_flax_pegasus.py @@ -1480,7 +1480,7 @@ def update_inputs_for_generation(self, model_outputs, model_kwargs): FLAX_PEGASUS_CONDITIONAL_GENERATION_DOCSTRING = """ Returns: - Summarization example:: + Summarization example: >>> from transformers import PegasusTokenizer, FlaxPegasusForConditionalGeneration @@ -1493,7 +1493,7 @@ def update_inputs_for_generation(self, model_outputs, model_kwargs): >>> # Generate Summary >>> summary_ids = model.generate(inputs['input_ids']).sequences >>> print(tokenizer.batch_decode(summary_ids, skip_special_tokens=True, clean_up_tokenization_spaces=False)) - Mask filling example:: + Mask filling example: >>> from transformers import PegasusTokenizer, FlaxPegasusForConditionalGeneration >>> tokenizer = PegasusTokenizer.from_pretrained('google/pegasus-large') >>> TXT = "My friends are but they eat too many diff --git a/src/transformers/models/pegasus/modeling_pegasus.py b/src/transformers/models/pegasus/modeling_pegasus.py index 14b300731d9c..1b31fa0eea8b 100755 --- a/src/transformers/models/pegasus/modeling_pegasus.py +++ b/src/transformers/models/pegasus/modeling_pegasus.py @@ -512,20 +512,25 @@ def _set_gradient_checkpointing(self, module, value=False): """ PEGASUS_GENERATION_EXAMPLE = r""" - Summarization example:: + Summarization example: - >>> from transformers import PegasusTokenizer, PegasusForConditionalGeneration + ```python + >>> from transformers import PegasusTokenizer, PegasusForConditionalGeneration - >>> model = PegasusForConditionalGeneration.from_pretrained('google/pegasus-xsum') >>> tokenizer = - PegasusTokenizer.from_pretrained('google/pegasus-xsum') + >>> model = PegasusForConditionalGeneration.from_pretrained("google/pegasus-xsum") + >>> tokenizer = PegasusTokenizer.from_pretrained("google/pegasus-xsum") - >>> ARTICLE_TO_SUMMARIZE = ( ... "PG&E stated it scheduled the blackouts in response to forecasts for high - winds " ... "amid dry conditions. The aim is to reduce the risk of wildfires. Nearly 800 thousand customers - were " ... "scheduled to be affected by the shutoffs which were expected to last through at least midday - tomorrow." ... ) >>> inputs = tokenizer([ARTICLE_TO_SUMMARIZE], max_length=1024, return_tensors='pt') + >>> ARTICLE_TO_SUMMARIZE = ( + ... "PG&E stated it scheduled the blackouts in response to forecasts for high winds " + ... "amid dry conditions. The aim is to reduce the risk of wildfires. Nearly 800 thousand customers were " + ... "scheduled to be affected by the shutoffs which were expected to last through at least midday tomorrow." + ... ) + >>> inputs = tokenizer(ARTICLE_TO_SUMMARIZE, max_length=1024, return_tensors="pt") - >>> # Generate Summary >>> summary_ids = model.generate(inputs['input_ids']) >>> print([tokenizer.decode(g, - skip_special_tokens=True, clean_up_tokenization_spaces=False) for g in summary_ids]) + >>> # Generate Summary + >>> summary_ids = model.generate(inputs["input_ids"]) + >>> print(tokenizer.batch_decode(summary_ids, skip_special_tokens=True, clean_up_tokenization_spaces=False)) + ``` """ PEGASUS_INPUTS_DOCSTRING = r""" diff --git a/src/transformers/models/pegasus/modeling_tf_pegasus.py b/src/transformers/models/pegasus/modeling_tf_pegasus.py index 0da2a9aa926e..3ed751f67bb7 100644 --- a/src/transformers/models/pegasus/modeling_tf_pegasus.py +++ b/src/transformers/models/pegasus/modeling_tf_pegasus.py @@ -555,20 +555,25 @@ def serving(self, inputs): """ PEGASUS_GENERATION_EXAMPLE = r""" - Summarization example:: + Summarization example: - >>> from transformers import PegasusTokenizer, TFPegasusForConditionalGeneration + ```python + >>> from transformers import PegasusTokenizer, TFPegasusForConditionalGeneration - >>> model = TFPegasusForConditionalGeneration.from_pretrained('google/pegasus-xsum') >>> tokenizer = - PegasusTokenizer.from_pretrained('google/pegasus-xsum') + >>> model = TFPegasusForConditionalGeneration.from_pretrained("google/pegasus-xsum") + >>> tokenizer = PegasusTokenizer.from_pretrained("google/pegasus-xsum") - >>> ARTICLE_TO_SUMMARIZE = ( ... "PG&E stated it scheduled the blackouts in response to forecasts for high - winds " ... "amid dry conditions. The aim is to reduce the risk of wildfires. Nearly 800 thousand customers - were " ... "scheduled to be affected by the shutoffs which were expected to last through at least midday - tomorrow." ... ) >>> inputs = tokenizer([ARTICLE_TO_SUMMARIZE], max_length=1024, return_tensors='tf') + >>> ARTICLE_TO_SUMMARIZE = ( + ... "PG&E stated it scheduled the blackouts in response to forecasts for high winds " + ... "amid dry conditions. The aim is to reduce the risk of wildfires. Nearly 800 thousand customers were " + ... "scheduled to be affected by the shutoffs which were expected to last through at least midday tomorrow." + ... ) + >>> inputs = tokenizer(ARTICLE_TO_SUMMARIZE, max_length=1024, return_tensors="tf") - >>> # Generate Summary >>> summary_ids = model.generate(inputs['input_ids']) >>> print([tokenizer.decode(g, - skip_special_tokens=True, clean_up_tokenization_spaces=False) for g in summary_ids]) + >>> # Generate Summary + >>> summary_ids = model.generate(inputs["input_ids"]) + >>> print(tokenizer.batch_decode(summary_ids, skip_special_tokens=True, clean_up_tokenization_spaces=False)) + ``` """ PEGASUS_INPUTS_DOCSTRING = r""" diff --git a/templates/adding_a_new_model/cookiecutter-template-{{cookiecutter.modelname}}/modeling_flax_{{cookiecutter.lowercase_modelname}}.py b/templates/adding_a_new_model/cookiecutter-template-{{cookiecutter.modelname}}/modeling_flax_{{cookiecutter.lowercase_modelname}}.py index 17939644d697..a8a537c8fd44 100644 --- a/templates/adding_a_new_model/cookiecutter-template-{{cookiecutter.modelname}}/modeling_flax_{{cookiecutter.lowercase_modelname}}.py +++ b/templates/adding_a_new_model/cookiecutter-template-{{cookiecutter.modelname}}/modeling_flax_{{cookiecutter.lowercase_modelname}}.py @@ -2605,35 +2605,40 @@ def update_inputs_for_generation(self, model_outputs, model_kwargs): FLAX_{{cookiecutter.uppercase_modelname}}_CONDITIONAL_GENERATION_DOCSTRING = """ Returns: - Summarization example:: + Summarization example: - >>> from transformers import {{cookiecutter.camelcase_modelname}}Tokenizer, Flax{{cookiecutter.camelcase_modelname}}ForConditionalGeneration + ```python + >>> from transformers import {{cookiecutter.camelcase_modelname}}Tokenizer, Flax{{cookiecutter.camelcase_modelname}}ForConditionalGeneration - >>> model = Flax{{cookiecutter.camelcase_modelname}}ForConditionalGeneration.from_pretrained('{{cookiecutter.checkpoint_identifier}}') - >>> tokenizer = {{cookiecutter.camelcase_modelname}}Tokenizer.from_pretrained('{{cookiecutter.checkpoint_identifier}}') + >>> model = Flax{{cookiecutter.camelcase_modelname}}ForConditionalGeneration.from_pretrained('{{cookiecutter.checkpoint_identifier}}') + >>> tokenizer = {{cookiecutter.camelcase_modelname}}Tokenizer.from_pretrained('{{cookiecutter.checkpoint_identifier}}') - >>> ARTICLE_TO_SUMMARIZE = "My friends are cool but they eat too many carbs." - >>> inputs = tokenizer([ARTICLE_TO_SUMMARIZE], max_length=1024, return_tensors='np') + >>> ARTICLE_TO_SUMMARIZE = "My friends are cool but they eat too many carbs." + >>> inputs = tokenizer([ARTICLE_TO_SUMMARIZE], max_length=1024, return_tensors='np') - >>> # Generate Summary - >>> summary_ids = model.generate(inputs['input_ids']).sequences - >>> print(tokenizer.batch_decode(summary_ids, skip_special_tokens=True, clean_up_tokenization_spaces=False)) + >>> # Generate Summary + >>> summary_ids = model.generate(inputs['input_ids']).sequences + >>> print(tokenizer.batch_decode(summary_ids, skip_special_tokens=True, clean_up_tokenization_spaces=False)) + ``` - Mask filling example:: + Mask filling example: - >>> from transformers import {{cookiecutter.camelcase_modelname}}Tokenizer, Flax{{cookiecutter.camelcase_modelname}}ForConditionalGeneration - >>> tokenizer = {{cookiecutter.camelcase_modelname}}Tokenizer.from_pretrained('{{cookiecutter.checkpoint_identifier}}') - >>> TXT = "My friends are but they eat too many carbs." - - >>> model = Flax{{cookiecutter.camelcase_modelname}}ForConditionalGeneration.from_pretrained('{{cookiecutter.checkpoint_identifier}}') - >>> input_ids = tokenizer([TXT], return_tensors='np')['input_ids'] - >>> logits = model(input_ids).logits + ```python + >>> from transformers import {{cookiecutter.camelcase_modelname}}Tokenizer, Flax{{cookiecutter.camelcase_modelname}}ForConditionalGeneration + + >>> model = Flax{{cookiecutter.camelcase_modelname}}ForConditionalGeneration.from_pretrained('{{cookiecutter.checkpoint_identifier}}') + >>> tokenizer = {{cookiecutter.camelcase_modelname}}Tokenizer.from_pretrained('{{cookiecutter.checkpoint_identifier}}') + + >>> TXT = "My friends are but they eat too many carbs." + >>> input_ids = tokenizer([TXT], return_tensors='np')['input_ids'] - >>> masked_index = (input_ids[0] == tokenizer.mask_token_id).nonzero().item() - >>> probs = jax.nn.softmax(logits[0, masked_index], axis=0) - >>> values, predictions = jax.lax.top_k(probs) + >>> logits = model(input_ids).logits + >>> masked_index = (input_ids[0] == tokenizer.mask_token_id).nonzero().item() + >>> probs = jax.nn.softmax(logits[0, masked_index], axis=0) + >>> values, predictions = jax.lax.top_k(probs) - >>> tokenizer.decode(predictions).split() + >>> tokenizer.decode(predictions).split() + ``` """ overwrite_call_docstring( diff --git a/templates/adding_a_new_model/cookiecutter-template-{{cookiecutter.modelname}}/modeling_{{cookiecutter.lowercase_modelname}}.py b/templates/adding_a_new_model/cookiecutter-template-{{cookiecutter.modelname}}/modeling_{{cookiecutter.lowercase_modelname}}.py index e0860946fefb..d40305c6f0f3 100755 --- a/templates/adding_a_new_model/cookiecutter-template-{{cookiecutter.modelname}}/modeling_{{cookiecutter.lowercase_modelname}}.py +++ b/templates/adding_a_new_model/cookiecutter-template-{{cookiecutter.modelname}}/modeling_{{cookiecutter.lowercase_modelname}}.py @@ -2067,19 +2067,21 @@ def _set_gradient_checkpointing(self, module, value=False): """ {{cookiecutter.uppercase_modelname}}_GENERATION_EXAMPLE = r""" - Summarization example:: + Summarization example: - >>> from transformers import {{cookiecutter.camelcase_modelname}}Tokenizer, {{cookiecutter.camelcase_modelname}}ForConditionalGeneration, {{cookiecutter.camelcase_modelname}}Config + ```python + >>> from transformers import {{cookiecutter.camelcase_modelname}}Tokenizer, {{cookiecutter.camelcase_modelname}}ForConditionalGeneration - >>> model = {{cookiecutter.camelcase_modelname}}ForConditionalGeneration.from_pretrained('{{cookiecutter.checkpoint_identifier}}') - >>> tokenizer = {{cookiecutter.camelcase_modelname}}Tokenizer.from_pretrained('{{cookiecutter.checkpoint_identifier}}') + >>> model = {{cookiecutter.camelcase_modelname}}ForConditionalGeneration.from_pretrained('{{cookiecutter.checkpoint_identifier}}') + >>> tokenizer = {{cookiecutter.camelcase_modelname}}Tokenizer.from_pretrained('{{cookiecutter.checkpoint_identifier}}') - >>> ARTICLE_TO_SUMMARIZE = "My friends are cool but they eat too many carbs." - >>> inputs = tokenizer([ARTICLE_TO_SUMMARIZE], max_length=1024, return_tensors='pt') + >>> ARTICLE_TO_SUMMARIZE = "My friends are cool but they eat too many carbs." + >>> inputs = tokenizer([ARTICLE_TO_SUMMARIZE], max_length=1024, return_tensors='pt') - >>> # Generate Summary - >>> summary_ids = model.generate(inputs['input_ids'], num_beams=4, max_length=5, early_stopping=True) - >>> print([tokenizer.decode(g, skip_special_tokens=True, clean_up_tokenization_spaces=False) for g in summary_ids]) + >>> # Generate Summary + >>> summary_ids = model.generate(inputs['input_ids'], num_beams=4, max_length=5) + >>> print(tokenizer.decode(summary_ids, skip_special_tokens=True, clean_up_tokenization_spaces=False)) + ``` """ {{cookiecutter.uppercase_modelname}}_INPUTS_DOCSTRING = r"""