Skip to content
40 changes: 25 additions & 15 deletions src/transformers/models/longformer/modeling_longformer.py
Original file line number Diff line number Diff line change
Expand Up @@ -1770,23 +1770,29 @@ def forward(

Returns:

Examples:
Mask filling example:

```python
>>> import torch
>>> from transformers import LongformerForMaskedLM, LongformerTokenizer
>>> from transformers import LongformerTokenizer, LongformerForMaskedLM

>>> model = LongformerForMaskedLM.from_pretrained("allenai/longformer-base-4096")
>>> tokenizer = LongformerTokenizer.from_pretrained("allenai/longformer-base-4096")

>>> SAMPLE_TEXT = " ".join(["Hello world! "] * 1000) # long input document
>>> input_ids = torch.tensor(tokenizer.encode(SAMPLE_TEXT)).unsqueeze(0) # batch of size 1

>>> attention_mask = None # default is local attention everywhere, which is a good choice for MaskedLM
>>> # check `LongformerModel.forward` for more details how to set *attention_mask*
>>> outputs = model(input_ids, attention_mask=attention_mask, labels=input_ids)
>>> loss = outputs.loss
>>> prediction_logits = outputs.logits
>>> model = LongformerForMaskedLM.from_pretrained("allenai/longformer-base-4096")

Let's try a very long input.

>>> TXT = (
... "My friends are <mask> but they eat too many carbs."
... + " That's why I decide not to eat with them." * 300
... )
>>> input_ids = tokenizer([TXT], return_tensors="pt")["input_ids"]
>>> logits = model(input_ids).logits

>>> masked_index = (input_ids[0] == tokenizer.mask_token_id).nonzero().item()
>>> probs = logits[0, masked_index].softmax(dim=0)
>>> values, predictions = probs.topk(5)

>>> tokenizer.decode(predictions).split()
['healthy', 'skinny', 'thin', 'good', 'vegetarian']
```"""
return_dict = return_dict if return_dict is not None else self.config.use_return_dict

Expand Down Expand Up @@ -1848,9 +1854,11 @@ def __init__(self, config):
@add_start_docstrings_to_model_forward(LONGFORMER_INPUTS_DOCSTRING.format("batch_size, sequence_length"))
@add_code_sample_docstrings(
processor_class=_TOKENIZER_FOR_DOC,
checkpoint=_CHECKPOINT_FOR_DOC,
checkpoint="jpelhaw/longformer-base-plagiarism-detection",
output_type=LongformerSequenceClassifierOutput,
config_class=_CONFIG_FOR_DOC,
expected_output="'ORIGINAL'",
expected_loss=5.44,
)
def forward(
self,
Expand Down Expand Up @@ -2114,9 +2122,11 @@ def __init__(self, config):
@add_start_docstrings_to_model_forward(LONGFORMER_INPUTS_DOCSTRING.format("batch_size, sequence_length"))
@add_code_sample_docstrings(
processor_class=_TOKENIZER_FOR_DOC,
checkpoint=_CHECKPOINT_FOR_DOC,
checkpoint="brad1141/Longformer-finetuned-norm",
output_type=LongformerTokenClassifierOutput,
config_class=_CONFIG_FOR_DOC,
expected_output="['Evidence', 'Evidence', 'Evidence', 'Evidence', 'Evidence', 'Evidence', 'Evidence', 'Evidence', 'Evidence', 'Evidence', 'Evidence', 'Evidence']",
expected_loss=0.63,
)
def forward(
self,
Expand Down
14 changes: 11 additions & 3 deletions src/transformers/models/longformer/modeling_tf_longformer.py
Original file line number Diff line number Diff line change
Expand Up @@ -2079,10 +2079,12 @@ def get_prefix_bias_name(self):
@add_start_docstrings_to_model_forward(LONGFORMER_INPUTS_DOCSTRING.format("batch_size, sequence_length"))
@add_code_sample_docstrings(
processor_class=_TOKENIZER_FOR_DOC,
checkpoint=_CHECKPOINT_FOR_DOC,
checkpoint="allenai/longformer-base-4096",
output_type=TFLongformerMaskedLMOutput,
config_class=_CONFIG_FOR_DOC,
mask="<mask>",
expected_output="' Paris'",
expected_loss=0.44,
)
def call(
self,
Expand Down Expand Up @@ -2175,6 +2177,8 @@ def __init__(self, config, *inputs, **kwargs):
checkpoint="allenai/longformer-large-4096-finetuned-triviaqa",
output_type=TFLongformerQuestionAnsweringModelOutput,
config_class=_CONFIG_FOR_DOC,
expected_output="' puppet'",
expected_loss=0.96,
)
def call(
self,
Expand Down Expand Up @@ -2318,9 +2322,11 @@ def __init__(self, config, *inputs, **kwargs):
@add_start_docstrings_to_model_forward(LONGFORMER_INPUTS_DOCSTRING.format("batch_size, sequence_length"))
@add_code_sample_docstrings(
processor_class=_TOKENIZER_FOR_DOC,
checkpoint=_CHECKPOINT_FOR_DOC,
checkpoint="hf-internal-testing/tiny-random-longformer",
output_type=TFLongformerSequenceClassifierOutput,
config_class=_CONFIG_FOR_DOC,
expected_output="'LABEL_1'",
expected_loss=0.69,
)
def call(
self,
Expand Down Expand Up @@ -2556,9 +2562,11 @@ def __init__(self, config, *inputs, **kwargs):
@add_start_docstrings_to_model_forward(LONGFORMER_INPUTS_DOCSTRING.format("batch_size, sequence_length"))
@add_code_sample_docstrings(
processor_class=_TOKENIZER_FOR_DOC,
checkpoint=_CHECKPOINT_FOR_DOC,
checkpoint="hf-internal-testing/tiny-random-longformer",
output_type=TFLongformerTokenClassifierOutput,
config_class=_CONFIG_FOR_DOC,
expected_output="['LABEL_1', 'LABEL_1', 'LABEL_1', 'LABEL_1', 'LABEL_1', 'LABEL_1', 'LABEL_1', 'LABEL_1', 'LABEL_1', 'LABEL_1', 'LABEL_1', 'LABEL_1', 'LABEL_1', 'LABEL_1', 'LABEL_1', 'LABEL_1', 'LABEL_1', 'LABEL_1', 'LABEL_1', 'LABEL_1']",
expected_loss=0.59,
)
def call(
self,
Expand Down
2 changes: 2 additions & 0 deletions utils/documentation_tests.txt
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,8 @@ src/transformers/models/glpn/modeling_glpn.py
src/transformers/models/gpt2/modeling_gpt2.py
src/transformers/models/gptj/modeling_gptj.py
src/transformers/models/hubert/modeling_hubert.py
src/transformers/models/longformer/modeling_longformer.py
src/transformers/models/longformer/modeling_tf_longformer.py
src/transformers/models/marian/modeling_marian.py
src/transformers/models/mbart/modeling_mbart.py
src/transformers/models/mobilebert/modeling_mobilebert.py
Expand Down