Add XLMRoBERTaModel in paddlenlp#9720
Conversation
| Examples: | ||
|
|
||
| ```python | ||
| >>> from ppdiffusers.transformers import XLMRobertaConfig, XLMRobertaModel |
| classifier_dropout=None, | ||
| **kwargs, | ||
| ): | ||
| kwargs["return_dict"] = kwargs.pop("return_dict", True) |
There was a problem hiding this comment.
这里我当时是跟transformers逻辑一样,默认值return_dict为True,而paddlenlp基本上所有模型都是False,需要决策一下
| if self.gradient_checkpointing and not hidden_states.stop_gradient: | ||
| layer_outputs = self._gradient_checkpointing_func( |
There was a problem hiding this comment.
gradient_checkpointing -> recompute,参照paddlenlp的改一下吧
| all_self_attentions = () if output_attentions else None | ||
| all_cross_attentions = () if output_attentions and self.config.add_cross_attention else None | ||
|
|
||
| if self.gradient_checkpointing and self.training: |
| super().__init__() | ||
| self.config = config | ||
| self.layer = nn.LayerList([XLMRobertaLayer(config) for _ in range(config.num_hidden_layers)]) | ||
| self.gradient_checkpointing = False |
There was a problem hiding this comment.
改成self.enable_recompute=False
| _deprecated_dict = { | ||
| "key": ".self_attn.q_proj.", | ||
| "name_mapping": { | ||
| # common | ||
| "encoder.layers.": "encoder.layer.", | ||
| # embeddings | ||
| "embeddings.layer_norm.": "embeddings.LayerNorm.", | ||
| # transformer | ||
| ".self_attn.q_proj.": ".attention.self.query.", | ||
| ".self_attn.k_proj.": ".attention.self.key.", | ||
| ".self_attn.v_proj.": ".attention.self.value.", | ||
| ".self_attn.out_proj.": ".attention.output.dense.", | ||
| ".norm1.": ".attention.output.LayerNorm.", | ||
| ".linear1.": ".intermediate.dense.", | ||
| ".linear2.": ".output.dense.", | ||
| ".norm2.": ".output.LayerNorm.", | ||
| }, | ||
| } |
|
|
||
| from paddlenlp.transformers.tokenizer_utils import AddedToken | ||
| from paddlenlp.transformers.tokenizer_utils import ( | ||
| PretrainedTokenizer as PPNLPPretrainedTokenizer, |
| __all__ = ["XLMRobertaTokenizer"] | ||
|
|
||
|
|
||
| class XLMRobertaTokenizer(PPNLPPretrainedTokenizer): |
| class ModuleUtilsMixin: | ||
| """ | ||
| A few utilities for `nn.Layer`, to be used as a mixin. | ||
| """ | ||
|
|
||
| # @property | ||
| # def device(self): | ||
| # """ | ||
| # `paddle.place`: The device on which the module is (assuming that all the module parameters are on the same | ||
| # device). | ||
| # """ | ||
| # try: | ||
| # return next(self.named_parameters())[1].place | ||
| # except StopIteration: | ||
| # try: | ||
| # return next(self.named_buffers())[1].place | ||
| # except StopIteration: | ||
| # return paddle.get_device() |
| @@ -0,0 +1,133 @@ | |||
| # coding=utf-8 | |||
| # Copyright 2018 The Google AI Language Team Authors and The HuggingFace Inc. team. | |||
There was a problem hiding this comment.
这里少一个paddle的copyright
| classifier_dropout=None, | ||
| **kwargs, | ||
| ): | ||
| kwargs["return_dict"] = kwargs.pop("return_dict", True) |
| @@ -0,0 +1,1517 @@ | |||
| # coding=utf-8 | |||
| from paddle import nn | ||
| from paddle.nn import BCEWithLogitsLoss, CrossEntropyLoss, MSELoss | ||
|
|
||
| from paddlenlp.transformers.activations import ACT2FN |
There was a problem hiding this comment.
from paddlenlp 这些都改成相对路径吧
| super().__init__() | ||
| self.config = config | ||
| self.layer = nn.LayerList([XLMRobertaLayer(config) for _ in range(config.num_hidden_layers)]) | ||
| self.gradient_checkpointing = False |
There was a problem hiding this comment.
改成self.enable_recompute=False
| Example: | ||
|
|
||
| ```python | ||
| >>> from ppdiffusers.transformers import AutoTokenizer, XLMRobertaForCausalLM, AutoConfig |
|
|
||
| from paddlenlp.transformers.tokenizer_utils import AddedToken | ||
| from paddlenlp.transformers.tokenizer_utils import ( | ||
| PretrainedTokenizer as PPNLPPretrainedTokenizer, |
|
在PaddleNLP/paddlenlp/transformers/auto文件里增加对应的模型、tokenizer映射 |
Codecov ReportAttention: Patch coverage is
Additional details and impacted files@@ Coverage Diff @@
## develop #9720 +/- ##
===========================================
- Coverage 53.20% 52.39% -0.81%
===========================================
Files 719 727 +8
Lines 115583 115095 -488
===========================================
- Hits 61493 60304 -1189
- Misses 54090 54791 +701 ☔ View full report in Codecov by Sentry. 🚀 New features to boost your workflow:
|
|
加两个单测,测试一下,模型初始化,tokenier 加载。 |
|
新增对应的单测脚本 |
| # See all XLM-RoBERTa models at https://huggingface.co/models?filter=xlm-roberta | ||
| ] | ||
|
|
||
|
|
There was a problem hiding this comment.
缺少 __all__ = [""] 说明一下可以import哪些模型名称

PR types
New features
PR changes
Models
Description
在PaddleNLP中增加对于XLM-RoBERTa系列模型的支持,已支持相关预训练模型如下: