diff --git a/src/transformers/models/longformer/modeling_longformer.py b/src/transformers/models/longformer/modeling_longformer.py index 20a2e9d239e2..c35cf318c437 100755 --- a/src/transformers/models/longformer/modeling_longformer.py +++ b/src/transformers/models/longformer/modeling_longformer.py @@ -1770,23 +1770,31 @@ def forward( Returns: - Examples: + Mask filling example: ```python - >>> import torch - >>> from transformers import LongformerForMaskedLM, LongformerTokenizer + >>> from transformers import LongformerTokenizer, LongformerForMaskedLM - >>> model = LongformerForMaskedLM.from_pretrained("allenai/longformer-base-4096") >>> tokenizer = LongformerTokenizer.from_pretrained("allenai/longformer-base-4096") + >>> model = LongformerForMaskedLM.from_pretrained("allenai/longformer-base-4096") + ``` - >>> SAMPLE_TEXT = " ".join(["Hello world! "] * 1000) # long input document - >>> input_ids = torch.tensor(tokenizer.encode(SAMPLE_TEXT)).unsqueeze(0) # batch of size 1 + Let's try a very long input. - >>> attention_mask = None # default is local attention everywhere, which is a good choice for MaskedLM - >>> # check `LongformerModel.forward` for more details how to set *attention_mask* - >>> outputs = model(input_ids, attention_mask=attention_mask, labels=input_ids) - >>> loss = outputs.loss - >>> prediction_logits = outputs.logits + ```python + >>> TXT = ( + ... "My friends are but they eat too many carbs." + ... + " That's why I decide not to eat with them." * 300 + ... ) + >>> input_ids = tokenizer([TXT], return_tensors="pt")["input_ids"] + >>> logits = model(input_ids).logits + + >>> masked_index = (input_ids[0] == tokenizer.mask_token_id).nonzero().item() + >>> probs = logits[0, masked_index].softmax(dim=0) + >>> values, predictions = probs.topk(5) + + >>> tokenizer.decode(predictions).split() + ['healthy', 'skinny', 'thin', 'good', 'vegetarian'] ```""" return_dict = return_dict if return_dict is not None else self.config.use_return_dict @@ -1848,9 +1856,11 @@ def __init__(self, config): @add_start_docstrings_to_model_forward(LONGFORMER_INPUTS_DOCSTRING.format("batch_size, sequence_length")) @add_code_sample_docstrings( processor_class=_TOKENIZER_FOR_DOC, - checkpoint=_CHECKPOINT_FOR_DOC, + checkpoint="jpelhaw/longformer-base-plagiarism-detection", output_type=LongformerSequenceClassifierOutput, config_class=_CONFIG_FOR_DOC, + expected_output="'ORIGINAL'", + expected_loss=5.44, ) def forward( self, @@ -2114,9 +2124,11 @@ def __init__(self, config): @add_start_docstrings_to_model_forward(LONGFORMER_INPUTS_DOCSTRING.format("batch_size, sequence_length")) @add_code_sample_docstrings( processor_class=_TOKENIZER_FOR_DOC, - checkpoint=_CHECKPOINT_FOR_DOC, + checkpoint="brad1141/Longformer-finetuned-norm", output_type=LongformerTokenClassifierOutput, config_class=_CONFIG_FOR_DOC, + expected_output="['Evidence', 'Evidence', 'Evidence', 'Evidence', 'Evidence', 'Evidence', 'Evidence', 'Evidence', 'Evidence', 'Evidence', 'Evidence', 'Evidence']", + expected_loss=0.63, ) def forward( self, diff --git a/src/transformers/models/longformer/modeling_tf_longformer.py b/src/transformers/models/longformer/modeling_tf_longformer.py index 124fe2c06fec..8f654ea84b18 100644 --- a/src/transformers/models/longformer/modeling_tf_longformer.py +++ b/src/transformers/models/longformer/modeling_tf_longformer.py @@ -2079,10 +2079,12 @@ def get_prefix_bias_name(self): @add_start_docstrings_to_model_forward(LONGFORMER_INPUTS_DOCSTRING.format("batch_size, sequence_length")) @add_code_sample_docstrings( processor_class=_TOKENIZER_FOR_DOC, - checkpoint=_CHECKPOINT_FOR_DOC, + checkpoint="allenai/longformer-base-4096", output_type=TFLongformerMaskedLMOutput, config_class=_CONFIG_FOR_DOC, mask="", + expected_output="' Paris'", + expected_loss=0.44, ) def call( self, @@ -2175,6 +2177,8 @@ def __init__(self, config, *inputs, **kwargs): checkpoint="allenai/longformer-large-4096-finetuned-triviaqa", output_type=TFLongformerQuestionAnsweringModelOutput, config_class=_CONFIG_FOR_DOC, + expected_output="' puppet'", + expected_loss=0.96, ) def call( self, @@ -2318,9 +2322,11 @@ def __init__(self, config, *inputs, **kwargs): @add_start_docstrings_to_model_forward(LONGFORMER_INPUTS_DOCSTRING.format("batch_size, sequence_length")) @add_code_sample_docstrings( processor_class=_TOKENIZER_FOR_DOC, - checkpoint=_CHECKPOINT_FOR_DOC, + checkpoint="hf-internal-testing/tiny-random-longformer", output_type=TFLongformerSequenceClassifierOutput, config_class=_CONFIG_FOR_DOC, + expected_output="'LABEL_1'", + expected_loss=0.69, ) def call( self, @@ -2556,9 +2562,11 @@ def __init__(self, config, *inputs, **kwargs): @add_start_docstrings_to_model_forward(LONGFORMER_INPUTS_DOCSTRING.format("batch_size, sequence_length")) @add_code_sample_docstrings( processor_class=_TOKENIZER_FOR_DOC, - checkpoint=_CHECKPOINT_FOR_DOC, + checkpoint="hf-internal-testing/tiny-random-longformer", output_type=TFLongformerTokenClassifierOutput, config_class=_CONFIG_FOR_DOC, + expected_output="['LABEL_1', 'LABEL_1', 'LABEL_1', 'LABEL_1', 'LABEL_1', 'LABEL_1', 'LABEL_1', 'LABEL_1', 'LABEL_1', 'LABEL_1', 'LABEL_1', 'LABEL_1', 'LABEL_1', 'LABEL_1', 'LABEL_1', 'LABEL_1', 'LABEL_1', 'LABEL_1', 'LABEL_1', 'LABEL_1']", + expected_loss=0.59, ) def call( self, diff --git a/utils/documentation_tests.txt b/utils/documentation_tests.txt index 8d8c7eccf2cd..b22499e9d0bd 100644 --- a/utils/documentation_tests.txt +++ b/utils/documentation_tests.txt @@ -31,6 +31,8 @@ src/transformers/models/glpn/modeling_glpn.py src/transformers/models/gpt2/modeling_gpt2.py src/transformers/models/gptj/modeling_gptj.py src/transformers/models/hubert/modeling_hubert.py +src/transformers/models/longformer/modeling_longformer.py +src/transformers/models/longformer/modeling_tf_longformer.py src/transformers/models/marian/modeling_marian.py src/transformers/models/mbart/modeling_mbart.py src/transformers/models/mobilebert/modeling_mobilebert.py