diff --git a/src/transformers/modeling_tf_outputs.py b/src/transformers/modeling_tf_outputs.py index ed0133c98465..4c98106e3045 100644 --- a/src/transformers/modeling_tf_outputs.py +++ b/src/transformers/modeling_tf_outputs.py @@ -175,7 +175,7 @@ class TFCausalLMOutput(ModelOutput): Base class for causal language model (or autoregressive) outputs. Args: - loss (:obj:`tf.Tensor` of shape :obj:`(1,)`, `optional`, returned when :obj:`labels` is provided): + loss (:obj:`tf.Tensor` of shape :obj:`(n,)`, `optional`, where n is the number of non-masked labels, returned when :obj:`labels` is provided): Language modeling loss (for next-token prediction). logits (:obj:`tf.Tensor` of shape :obj:`(batch_size, sequence_length, config.vocab_size)`): Prediction scores of the language modeling head (scores for each vocabulary token before SoftMax). @@ -204,7 +204,7 @@ class TFCausalLMOutputWithPast(ModelOutput): Base class for causal language model (or autoregressive) outputs. Args: - loss (:obj:`tf.Tensor` of shape :obj:`(1,)`, `optional`, returned when :obj:`labels` is provided): + loss (:obj:`tf.Tensor` of shape :obj:`(n,)`, `optional`, where n is the number of non-masked labels, returned when :obj:`labels` is provided): Language modeling loss (for next-token prediction). logits (:obj:`tf.Tensor` of shape :obj:`(batch_size, sequence_length, config.vocab_size)`): Prediction scores of the language modeling head (scores for each vocabulary token before SoftMax). @@ -240,7 +240,7 @@ class TFMaskedLMOutput(ModelOutput): Base class for masked language models outputs. Args: - loss (:obj:`tf.Tensor` of shape :obj:`(1,)`, `optional`, returned when :obj:`labels` is provided): + loss (:obj:`tf.Tensor` of shape :obj:`(n,)`, `optional`, where n is the number of non-masked labels, returned when :obj:`labels` is provided): Masked language modeling (MLM) loss. logits (:obj:`tf.Tensor` of shape :obj:`(batch_size, sequence_length, config.vocab_size)`): Prediction scores of the language modeling head (scores for each vocabulary token before SoftMax). @@ -269,7 +269,7 @@ class TFSeq2SeqLMOutput(ModelOutput): Base class for sequence-to-sequence language models outputs. Args: - loss (:obj:`tf.Tensor` of shape :obj:`(1,)`, `optional`, returned when :obj:`labels` is provided): + loss (:obj:`tf.Tensor` of shape :obj:`(n,)`, `optional`, where n is the number of non-masked labels, returned when :obj:`labels` is provided): Language modeling loss. logits (:obj:`tf.Tensor` of shape :obj:`(batch_size, sequence_length, config.vocab_size)`): Prediction scores of the language modeling head (scores for each vocabulary token before SoftMax). @@ -321,7 +321,7 @@ class TFNextSentencePredictorOutput(ModelOutput): Base class for outputs of models predicting if two sentences are consecutive or not. Args: - loss (:obj:`tf.Tensor` of shape :obj:`(1,)`, `optional`, returned when :obj:`next_sentence_label` is provided): + loss (:obj:`tf.Tensor` of shape :obj:`(n,)`, `optional`, where n is the number of non-masked labels, returned when :obj:`next_sentence_label` is provided): Next sentence prediction loss. logits (:obj:`tf.Tensor` of shape :obj:`(batch_size, 2)`): Prediction scores of the next sequence prediction (classification) head (scores of True/False continuation @@ -351,7 +351,7 @@ class TFSequenceClassifierOutput(ModelOutput): Base class for outputs of sentence classification models. Args: - loss (:obj:`tf.Tensor` of shape :obj:`(1,)`, `optional`, returned when :obj:`labels` is provided): + loss (:obj:`tf.Tensor` of shape :obj:`(batch_size, )`, `optional`, returned when :obj:`labels` is provided): Classification (or regression if config.num_labels==1) loss. logits (:obj:`tf.Tensor` of shape :obj:`(batch_size, config.num_labels)`): Classification (or regression if config.num_labels==1) scores (before SoftMax). @@ -432,7 +432,7 @@ class TFMultipleChoiceModelOutput(ModelOutput): Base class for outputs of multiple choice models. Args: - loss (:obj:`tf.Tensor` of shape `(1,)`, `optional`, returned when :obj:`labels` is provided): + loss (:obj:`tf.Tensor` of shape `(batch_size, )`, `optional`, returned when :obj:`labels` is provided): Classification loss. logits (:obj:`tf.Tensor` of shape :obj:`(batch_size, num_choices)`): `num_choices` is the second dimension of the input tensors. (see `input_ids` above). @@ -463,7 +463,7 @@ class TFTokenClassifierOutput(ModelOutput): Base class for outputs of token classification models. Args: - loss (:obj:`tf.Tensor` of shape :obj:`(1,)`, `optional`, returned when ``labels`` is provided) : + loss (:obj:`tf.Tensor` of shape :obj:`(n,)`, `optional`, where n is the number of unmasked labels, returned when ``labels`` is provided) : Classification loss. logits (:obj:`tf.Tensor` of shape :obj:`(batch_size, sequence_length, config.num_labels)`): Classification scores (before SoftMax). @@ -492,7 +492,7 @@ class TFQuestionAnsweringModelOutput(ModelOutput): Base class for outputs of question answering models. Args: - loss (:obj:`tf.Tensor` of shape :obj:`(1,)`, `optional`, returned when :obj:`labels` is provided): + loss (:obj:`tf.Tensor` of shape :obj:`(batch_size, )`, `optional`, returned when :obj:`start_positions` and :obj:`end_positions` are provided): Total span extraction loss is the sum of a Cross-Entropy for the start and end positions. start_logits (:obj:`tf.Tensor` of shape :obj:`(batch_size, sequence_length)`): Span-start scores (before SoftMax). @@ -579,7 +579,7 @@ class TFSequenceClassifierOutputWithPast(ModelOutput): Base class for outputs of sentence classification models. Args: - loss (:obj:`tf.Tensor` of shape :obj:`(1,)`, `optional`, returned when :obj:`labels` is provided): + loss (:obj:`tf.Tensor` of shape :obj:`(batch_size, )`, `optional`, returned when :obj:`labels` is provided): Classification (or regression if config.num_labels==1) loss. logits (:obj:`tf.Tensor` of shape :obj:`(batch_size, config.num_labels)`): Classification (or regression if config.num_labels==1) scores (before SoftMax). diff --git a/src/transformers/models/blenderbot/modeling_tf_blenderbot.py b/src/transformers/models/blenderbot/modeling_tf_blenderbot.py index b5c7d80c7fde..cb935a35f0e1 100644 --- a/src/transformers/models/blenderbot/modeling_tf_blenderbot.py +++ b/src/transformers/models/blenderbot/modeling_tf_blenderbot.py @@ -1322,7 +1322,7 @@ def call( **kwargs, ): r""" - labels (:obj:`torch.LongTensor` of shape :obj:`(batch_size, sequence_length)`, `optional`): + labels (:obj:`tf.tensor` of shape :obj:`(batch_size, sequence_length)`, `optional`): Labels for computing the masked language modeling loss. Indices should either be in ``[0, ..., config.vocab_size]`` or -100 (see ``input_ids`` docstring). Tokens with indices set to ``-100`` are ignored (masked), the loss is only computed for the tokens with labels in ``[0, ..., config.vocab_size]``. diff --git a/src/transformers/models/blenderbot_small/modeling_tf_blenderbot_small.py b/src/transformers/models/blenderbot_small/modeling_tf_blenderbot_small.py index beed93b227e1..ab7fb68bde5e 100644 --- a/src/transformers/models/blenderbot_small/modeling_tf_blenderbot_small.py +++ b/src/transformers/models/blenderbot_small/modeling_tf_blenderbot_small.py @@ -1297,7 +1297,7 @@ def call( **kwargs, ): r""" - labels (:obj:`torch.LongTensor` of shape :obj:`(batch_size, sequence_length)`, `optional`): + labels (:obj:`tf.tensor` of shape :obj:`(batch_size, sequence_length)`, `optional`): Labels for computing the masked language modeling loss. Indices should either be in ``[0, ..., config.vocab_size]`` or -100 (see ``input_ids`` docstring). Tokens with indices set to ``-100`` are ignored (masked), the loss is only computed for the tokens with labels in ``[0, ..., config.vocab_size]``. diff --git a/src/transformers/models/marian/modeling_tf_marian.py b/src/transformers/models/marian/modeling_tf_marian.py index dbc4c8001672..e9b31d23a46c 100644 --- a/src/transformers/models/marian/modeling_tf_marian.py +++ b/src/transformers/models/marian/modeling_tf_marian.py @@ -1314,7 +1314,7 @@ def call( **kwargs, ): r""" - labels (:obj:`torch.LongTensor` of shape :obj:`(batch_size, sequence_length)`, `optional`): + labels (:obj:`tf.tensor` of shape :obj:`(batch_size, sequence_length)`, `optional`): Labels for computing the masked language modeling loss. Indices should either be in ``[0, ..., config.vocab_size]`` or -100 (see ``input_ids`` docstring). Tokens with indices set to ``-100`` are ignored (masked), the loss is only computed for the tokens with labels in ``[0, ..., config.vocab_size]``. diff --git a/src/transformers/models/pegasus/modeling_tf_pegasus.py b/src/transformers/models/pegasus/modeling_tf_pegasus.py index 57908e223c9a..73f9e4e8d3e9 100644 --- a/src/transformers/models/pegasus/modeling_tf_pegasus.py +++ b/src/transformers/models/pegasus/modeling_tf_pegasus.py @@ -1328,7 +1328,7 @@ def call( **kwargs, ): """ - labels (:obj:`torch.LongTensor` of shape :obj:`(batch_size, sequence_length)`, `optional`): + labels (:obj:`tf.tensor` of shape :obj:`(batch_size, sequence_length)`, `optional`): Labels for computing the masked language modeling loss. Indices should either be in ``[0, ..., config.vocab_size]`` or -100 (see ``input_ids`` docstring). Tokens with indices set to ``-100`` are ignored (masked), the loss is only computed for the tokens with labels in ``[0, ..., config.vocab_size]``.