Make fuse_optimizer_op_pass also work when the model contains sparse gradients.#18664
Conversation
test=develop
test=develop
test=develop
test=develop
test=develop
7bdea4e to
4a73988
Compare
test=develop
4a73988 to
419f342
Compare
| result.Get<details::ParamsAndGrads>(details::kParamsAndSparseGrads); | ||
|
|
||
| for (auto ¶m_grad : params_grads) { | ||
| if (IsSupportedVarType(GetTypeOfVar(vars_info, param_grad.second))) { |
There was a problem hiding this comment.
IsLodTensorVartype or IsDenseGradVarType
| if (node->Op()->Type() == fuse_op_type) { | ||
| auto grad_name = node->Op()->Input(kGrad); | ||
| PADDLE_ENFORCE_EQ(grad_name.size(), static_cast<size_t>(1)); | ||
| if (GettypeOfVar(vars_info, grad_name[0]) == proto::VarType::LOD_TENSOR) { |
| const std::string prefix(details::kFusedVarNamePrefix); | ||
| // NOTE: the fused_var_name should be unique. | ||
| for (auto &var_name : aux_var_names) { | ||
| // NOTE: the fused_var_name should be unique. |
There was a problem hiding this comment.
Line 81 is used to check this.
test=develop
| const std::string prefix(details::kFusedVarNamePrefix); | ||
| // NOTE: the fused_var_name should be unique. | ||
| for (auto &var_name : aux_var_names) { | ||
| // NOTE: the fused_var_name should be unique. |
There was a problem hiding this comment.
Line 81 is used to check this.
| "The VarDescs of persistable variable are not consistency."); | ||
| PADDLE_ENFORCE(graph == native_graph, | ||
| "Pass::Apply() cannot delete the passed graph and shouldn't " | ||
| "return a new graph.(For the need of pybind11)"); |
There was a problem hiding this comment.
This check is unnecessary.
| result.Get<details::ParamsAndGrads>(details::kParamsAndSparseGrads); | ||
|
|
||
| for (auto ¶m_grad : params_grads) { | ||
| if (IsSupportedVarType(GetTypeOfVar(vars_info, param_grad.second))) { |
| if (node->Op()->Type() == fuse_op_type) { | ||
| auto grad_name = node->Op()->Input(kGrad); | ||
| PADDLE_ENFORCE_EQ(grad_name.size(), static_cast<size_t>(1)); | ||
| if (GettypeOfVar(vars_info, grad_name[0]) == proto::VarType::LOD_TENSOR) { |
| if (result.Has(details::kParamsAndGrads)) { | ||
| auto ¶ms_grads = | ||
| result.Get<details::ParamsAndGrads>(details::kParamsAndGrads); | ||
| if (result.Has(details::kParamsAndDenseGrads)) { |
There was a problem hiding this comment.
This nested if is too long.
864393f to
3d011e7
Compare
test=develop
095c018 to
464b882
Compare
464b882 to
126d0a0
Compare
test=develop
| } | ||
| } | ||
| } | ||
| } |
There was a problem hiding this comment.
Define the fused variables in the local execution scope.
Because for some model, there may be more than one program, and those programs may share some parameters, for the previous strategy, the gradients of the shared parameters of those programs are also shared, But this is somewhat problematic, so we should define those fused variables of gradients in the local execution scope.
There was a problem hiding this comment.
Copy these line to Comments may be better
And which is the unit test?
test=develop
Uh oh!
There was an error while loading. Please reload this page.