vera-pissa method added#8722
Conversation
|
Thanks for your contribution! |
Codecov ReportAttention: Patch coverage is
Additional details and impacted files@@ Coverage Diff @@
## develop #8722 +/- ##
===========================================
- Coverage 55.73% 55.51% -0.22%
===========================================
Files 623 630 +7
Lines 97464 98374 +910
===========================================
+ Hits 54324 54616 +292
- Misses 43140 43758 +618 ☔ View full report in Codecov by Sentry. |
| @@ -0,0 +1,187 @@ | |||
| out_features = 16 # Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. | |||
| isinstance(self.model, LoRAModel) | ||
| or isinstance(self.model, PrefixModelForCausalLM) | ||
| or isinstance(self.model, VeRAModel) | ||
| ): |
There was a problem hiding this comment.
- 测试一下VeRAModel 重新加载和热启的时候能否正常使用
- 重新加载就是训练的时候设置 load_best_model_at_end 为 True,看时候能够正常加载最好的checkpoint
- 热启指的是训练过程中,output_dir中包含原有训练checkpoint,trainer可以启用resume_from_checkpoint去加载到最后一个checkpoint继续训练
There was a problem hiding this comment.
测试可以重新加载 done
There was a problem hiding this comment.
适配了热启动,测试可以 done
| self.model = self.get_vera_model(model, vera_config) | ||
| self.is_pipelinemodel = False | ||
| if issubclass(type(self.model), PipelineLayer): | ||
| self.is_pipelinemodel = True |
There was a problem hiding this comment.
目前vera也不支持pp,建议raise NotImplementedError("vera don't support pipeline parallel now")
| vera_model = cls(model, vera_config) | ||
|
|
||
| # define vera weight name | ||
| if vera_config_tensor_parallel_degree > 1: |
There was a problem hiding this comment.
目前不支持vera都可以先删除tensor_parallel_degree相关的分支
| trainable_state_dict = OrderedDict() | ||
| for name, weight in self.model.state_dict().items(): | ||
| # get vera parameter & QAT scale parameter | ||
| if not weight.stop_gradient or "activation_quanter" in name or "weight_quanter" in name: |
| # freezeB=False, vera_b, vera_d 可训练 | ||
| if "vera" in name: | ||
| weight.stop_gradient = False | ||
| elif "lora_B" in name and notfreezeB: |
There was a problem hiding this comment.
之前vera_model中参数名是lora_ 已经全部统一成vera_
done
|
|
||
| def train(self): | ||
| super().train() | ||
| if self.merge_weights and self.merged: |
There was a problem hiding this comment.
merge_weight已经删除,新增为一个merge函数不再与train和eval耦合,可以参考这个prhttps://github.com//pull/8674/files
|
|
||
| else: | ||
| # Actual trainable parameters | ||
| self.lora_A = self.create_parameter( |
| @@ -0,0 +1,104 @@ | |||
| # Copyright (c) 2024 PaddlePaddle Authors. All Rights Reserved. | |||
There was a problem hiding this comment.
验证过,用merge后的模型可以正确预测。 done
| # | ||
| # Unless required by applicable law or agreed to in writing, software | ||
| # distributed under the License is distributed on an "AS IS" BASIS, | ||
| # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
There was a problem hiding this comment.
| "For example, ['q', 'v'] or '.*decoder.*(SelfAttention|EncDecAttention).*(q|v)$' " | ||
| }, | ||
| ) | ||
| vera_alpha: int = field(default=8, metadata={"help": "Lora alpha"}) |
| r: int = 0, | ||
| vera_alpha: int = 1, | ||
| vera_dropout: float = 0.0, | ||
| merge_weights: bool = True, |
| if enable_vera is None: | ||
| if isinstance(module, nn.Linear): | ||
| vera_module = VeRALinear( | ||
| # 将要替换的层传递过去 |
| isinstance(vera_config.enable_vera_list, List) | ||
| and all(isinstance(item, bool) for item in vera_config.enable_vera_list) | ||
| ): | ||
| enable_vera_list = [vera_config.enable_vera_list] |
There was a problem hiding this comment.
enable_vera_list 这个应该是直接复用lora的,vera并没有对应的功能,建议把enable_vera_list相关全部删除,走代码里为None的分支就好
There was a problem hiding this comment.
应该是在vera_config层面就把enable_vera_list全部删除,因为我们不需要这个参数,我看现在代码还保留着?
| self.run_predictor({"inference_model": False}) | ||
|
|
||
|
|
||
| # @parameterized_class( |
| ["baichuan"], | ||
| ], | ||
| ) | ||
| class VeraTest(LLMTest, unittest.TestCase): |
There was a problem hiding this comment.
cd PaddleNLP
python -m pytest tests/llm/test_vera.py
There was a problem hiding this comment.
可以正常运行 done
| ) and args.device == "cpu": | ||
| raise ValueError("We can not apply bfloat16 or nf4/fp4 vera merge on cpu.") | ||
|
|
||
| vera_config.merge_weights = False |
There was a problem hiding this comment.
vera_config.merge_weights没有merge weight了,记得去掉,否则会报错
| self.merged = False | ||
|
|
||
| if pissa_init: | ||
| assert self.vera_alpha == self.r, "pissa method requires vera_alpha=r, scaling=1" |
There was a problem hiding this comment.
为了增加代码的覆盖率,重新加回去了并添加相应的异常测试
| isinstance(vera_config.enable_vera_list, List) | ||
| and all(isinstance(item, bool) for item in vera_config.enable_vera_list) | ||
| ): | ||
| enable_vera_list = [vera_config.enable_vera_list] |
There was a problem hiding this comment.
应该是在vera_config层面就把enable_vera_list全部删除,因为我们不需要这个参数,我看现在代码还保留着?
| @@ -0,0 +1,15 @@ | |||
| { | |||
| "base_model_name_or_path": null, | |||
There was a problem hiding this comment.
测试用的,已删除,done
There was a problem hiding this comment.
已把vera_config层就把enable_vera_list全部删除

PR types
New features
PR changes
Add vera-pissa in peft/vera
Description
根据review意见修改
