Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 2 additions & 1 deletion paddle/fluid/API.spec
Original file line number Diff line number Diff line change
Expand Up @@ -358,7 +358,7 @@ paddle.fluid.layers.generate_mask_labels (ArgSpec(args=['im_info', 'gt_classes',
paddle.fluid.layers.iou_similarity (ArgSpec(args=['x', 'y', 'name'], varargs=None, keywords=None, defaults=(None,)), ('document', '587845f60c5d97ffdf2dfd21da52eca1'))
paddle.fluid.layers.box_coder (ArgSpec(args=['prior_box', 'prior_box_var', 'target_box', 'code_type', 'box_normalized', 'name', 'axis'], varargs=None, keywords=None, defaults=('encode_center_size', True, None, 0)), ('document', '032d0f4b7d8f6235ee5d91e473344f0e'))
paddle.fluid.layers.polygon_box_transform (ArgSpec(args=['input', 'name'], varargs=None, keywords=None, defaults=(None,)), ('document', '0e5ac2507723a0b5adec473f9556799b'))
paddle.fluid.layers.yolov3_loss (ArgSpec(args=['x', 'gt_box', 'gt_label', 'anchors', 'anchor_mask', 'class_num', 'ignore_thresh', 'downsample_ratio', 'gt_score', 'use_label_smooth', 'name'], varargs=None, keywords=None, defaults=(None, True, None)), ('document', '4d170807a13d33925d1049d2892832bf'))
paddle.fluid.layers.yolov3_loss (ArgSpec(args=['x', 'gt_box', 'gt_label', 'anchors', 'anchor_mask', 'class_num', 'ignore_thresh', 'downsample_ratio', 'gt_score', 'use_label_smooth', 'name'], varargs=None, keywords=None, defaults=(None, True, None)), ('document', 'eb62b1ff7cc981f3483a62321a491f2e'))
paddle.fluid.layers.yolo_box (ArgSpec(args=['x', 'img_size', 'anchors', 'class_num', 'conf_thresh', 'downsample_ratio', 'name'], varargs=None, keywords=None, defaults=(None,)), ('document', '5566169a5ab993d177792c023c7fb340'))
paddle.fluid.layers.box_clip (ArgSpec(args=['input', 'im_info', 'name'], varargs=None, keywords=None, defaults=(None,)), ('document', '397e9e02b451d99c56e20f268fa03f2e'))
paddle.fluid.layers.multiclass_nms (ArgSpec(args=['bboxes', 'scores', 'score_threshold', 'nms_top_k', 'keep_top_k', 'nms_threshold', 'normalized', 'nms_eta', 'background_label', 'name'], varargs=None, keywords=None, defaults=(0.3, True, 1.0, 0, None)), ('document', 'ca7d1107b6c5d2d6d8221039a220fde0'))
Expand Down Expand Up @@ -424,6 +424,7 @@ paddle.fluid.contrib.HDFSClient.upload (ArgSpec(args=['self', 'hdfs_path', 'loca
paddle.fluid.contrib.multi_download (ArgSpec(args=['client', 'hdfs_path', 'local_path', 'trainer_id', 'trainers', 'multi_processes'], varargs=None, keywords=None, defaults=(5,)), ('document', '100927be598ed8f9eaa1f3ef1b23568a'))
paddle.fluid.contrib.multi_upload (ArgSpec(args=['client', 'hdfs_path', 'local_path', 'multi_processes', 'overwrite', 'sync'], varargs=None, keywords=None, defaults=(5, False, True)), ('document', '183f34c83d30dbe16e09e8716c41958a'))
paddle.fluid.contrib.extend_with_decoupled_weight_decay (ArgSpec(args=['base_optimizer'], varargs=None, keywords=None, defaults=None), ('document', 'a1095dfd4ec725747f662d69cd7659d4'))
paddle.fluid.contrib.mixed_precision.decorate (ArgSpec(args=['optimizer', 'init_loss_scaling', 'use_dynamic_loss_scaling'], varargs=None, keywords=None, defaults=(1.0, False)), ('document', '67e9bf14f345b38da169beb1ebb276eb'))
paddle.fluid.transpiler.DistributeTranspiler.__init__ (ArgSpec(args=['self', 'config'], varargs=None, keywords=None, defaults=(None,)), ('document', '6adf97f83acf6453d4a6a4b1070f3754'))
paddle.fluid.transpiler.DistributeTranspiler.get_pserver_program (ArgSpec(args=['self', 'endpoint'], varargs=None, keywords=None, defaults=None), ('document', '292ab72977afbe58e6a3bde175452680'))
paddle.fluid.transpiler.DistributeTranspiler.get_pserver_programs (ArgSpec(args=['self', 'endpoint'], varargs=None, keywords=None, defaults=None), ('document', '78f4949aedf317666a89ca74b3748ba8'))
Expand Down
15 changes: 12 additions & 3 deletions paddle/fluid/framework/details/fuse_optimizer_op_pass.cc
Original file line number Diff line number Diff line change
Expand Up @@ -147,20 +147,29 @@ void FuseOptimizerOpPass::InitFusedGradsAndAllocSpaceForGrads(
vars.emplace(node->Var()->Name(), node);
}
}

// Set Gradients as Persistable to prevent this var becoming reusable.
for (auto &grad_var_name : grads) {
auto iter = vars.find(grad_var_name);
PADDLE_ENFORCE(iter != vars.end());
PADDLE_ENFORCE_NOT_NULL(iter->second->Var());
PADDLE_ENFORCE(iter->second->Var()->GetType() == proto::VarType::LOD_TENSOR,
"Currently the gradient type only should be LoDTensor when "
"fusing optimizer ops.");
iter->second->Var()->SetPersistable(true);
}

// Init Grads
for (auto it = local_scopes.rbegin(); it != local_scopes.rend(); ++it) {
auto &scope = *it;
VLOG(6) << "Init: " << fused_grad_name;
PADDLE_ENFORCE(scope->FindVar(fused_grad_name) == nullptr,
"%s has existed in scope.", fused_grad_name);
scope->Var(fused_grad_name)->GetMutable<LoDTensor>();

for (auto &grad_var_name : grads) {
auto iter = vars.find(grad_var_name);
PADDLE_ENFORCE(iter != vars.end());
PADDLE_ENFORCE_NOT_NULL(iter->second->Var());
PADDLE_ENFORCE_EQ(iter->second->Var()->GetType(),
proto::VarType::LOD_TENSOR);
scope->Var(grad_var_name)->GetMutable<LoDTensor>();
}
}
Expand Down
1 change: 1 addition & 0 deletions paddle/fluid/framework/details/reference_count_pass.cc
Original file line number Diff line number Diff line change
Expand Up @@ -138,6 +138,7 @@ static ComputationOpHandle *FindNextComputationOpHandleOrReturnItself(
for (auto *pending_op : out_var->PendingOps()) {
if (visited.count(pending_op)) continue;
visited.insert(pending_op);
q.push(pending_op);
}
}
} while (!q.empty());
Expand Down
5 changes: 5 additions & 0 deletions paddle/fluid/framework/operator.cc
Original file line number Diff line number Diff line change
Expand Up @@ -1023,6 +1023,7 @@ Scope* OperatorWithKernel::PrepareData(
std::vector<std::string>* transfered_inplace_vars,
RuntimeContext* ctx) const {
Scope* new_scope = nullptr;
if (!need_prepare_data_) return new_scope;

std::unordered_set<std::string> no_buffer_ins;
if (info_) {
Expand Down Expand Up @@ -1115,6 +1116,10 @@ Scope* OperatorWithKernel::PrepareData(
SetTensorToVariable(*var, out, trans_var);
}
}
// If new_scope = nullptr, it means that for each input of this Op, there is
// no TransformData. Thus, PrepareData could be skipped at the rest iterations
// of this Op's execution to save the elapsed time.
if (!new_scope) need_prepare_data_ = false;

return new_scope;
}
Expand Down
1 change: 1 addition & 0 deletions paddle/fluid/framework/operator.h
Original file line number Diff line number Diff line change
Expand Up @@ -506,6 +506,7 @@ class OperatorWithKernel : public OperatorBase {
mutable std::unique_ptr<OpKernelFunc> kernel_func_;
mutable std::unique_ptr<RuntimeContext> runtime_ctx_;
mutable const Scope* pre_scope_ = nullptr;
mutable bool need_prepare_data_ = true;
mutable bool enable_cache_runtime_context = false;
mutable bool enable_cache_expected_kernel = false;
mutable bool all_kernels_must_compute_runtime_shape = false;
Expand Down
8 changes: 6 additions & 2 deletions paddle/fluid/imperative/layer.cc
Original file line number Diff line number Diff line change
Expand Up @@ -336,11 +336,15 @@ void OpBase::InvokeBackwardHooks() {
}
}

void OpBase::RegisterBackwardHooks(const py::object& callable) {
void OpBase::RegisterBackwardHooks(const py::object& callable, bool front) {
VLOG(3) << "Register backward hooks " << trace_id_;

// TODO(minqiyang): check the callable format
backward_hooks_.push_back(callable);
if (front) {
backward_hooks_.insert(backward_hooks_.begin(), callable);
} else {
backward_hooks_.push_back(callable);
}
}

void VarBase::RunBackward() {
Expand Down
2 changes: 1 addition & 1 deletion paddle/fluid/imperative/layer.h
Original file line number Diff line number Diff line change
Expand Up @@ -310,7 +310,7 @@ class PYBIND11_HIDDEN OpBase {
return grad_op_descs_[index]->Type();
}

void RegisterBackwardHooks(const py::object& callable);
void RegisterBackwardHooks(const py::object& callable, bool front = false);

void InvokeBackwardHooks();

Expand Down
65 changes: 65 additions & 0 deletions paddle/fluid/operators/activation_op.cc
Original file line number Diff line number Diff line change
Expand Up @@ -619,6 +619,28 @@ class ActivationOpDoubleGrad : public framework::OperatorWithKernel {
}
};

class LeakyReluDoubleGrad : public framework::OperatorWithKernel {
public:
using framework::OperatorWithKernel::OperatorWithKernel;

void InferShape(framework::InferShapeContext* ctx) const override {
if (ctx->HasOutput("DX")) {
ctx->ShareDim("X", "DX");
ctx->ShareLoD("X", "DX");
}
if (ctx->HasOutput("DDOut")) {
ctx->ShareDim("X", "DDOut");
ctx->ShareLoD("X", "DDOut");
}
}

protected:
framework::OpKernelType GetExpectedKernelType(
const framework::ExecutionContext& ctx) const override {
return GetKernelType(ctx, *this, "DDX");
}
};

//
// ReluGrad: dx = dy if y >= 0 else 0
// ReluGradGrad: ddy = ddx if y >= 0 else 0
Expand All @@ -643,6 +665,29 @@ class ReluDoubleGradMaker : public ::paddle::framework::SingleGradOpDescMaker {
}
};

// leaky_relu Grad: dx=dy if y>=0 else alpha * dy
// leaky_relu GradGrad: ddy=ddx if y>=0 else alpha * ddx
class LeakyReluDoubleGradMaker
: public ::paddle::framework::SingleGradOpDescMaker {
public:
using ::paddle::framework::SingleGradOpDescMaker::SingleGradOpDescMaker;

protected:
std::unique_ptr<::paddle::framework::OpDesc> Apply() const override {
auto* op = new ::paddle::framework::OpDesc();
op->SetType("leaky_relu_grad_grad");
// input1: X
op->SetInput("X", Input("X"));
// X@GRAD@GRAD: ddx
op->SetInput("DDX", OutputGrad(framework::GradVarName("X")));
op->SetAttrMap(Attrs());
// Out@GRAD@GRAD: ddy
op->SetOutput("DX", InputGrad("X"));
op->SetOutput("DDOut", InputGrad(framework::GradVarName("Out")));
return std::unique_ptr<::paddle::framework::OpDesc>(op);
}
};

} // namespace operators
} // namespace paddle

Expand Down Expand Up @@ -699,3 +744,23 @@ REGISTER_OP_CPU_KERNEL(
ops::ReluGradGradFunctor<double>>,
ops::ActivationDoubleGradKernel<plat::CPUDeviceContext,
ops::ReluGradGradFunctor<plat::float16>>);

REGISTER_OPERATOR(
leaky_relu, ops::ActivationOp, ops::LeakyReluOpMaker,
ops::ActivationOpInferVarType,
ops::ActivationGradOpDescMaker<ops::LeakyReluGradFunctor<float>::FwdDeps()>,
paddle::framework::SingleOpInplaceInToOut);
REGISTER_OPERATOR(leaky_relu_grad, ops::ActivationOpGrad,
paddle::framework::SingleOpInplaceInToOut,
ops::LeakyReluDoubleGradMaker);
REGISTER_OPERATOR(leaky_relu_grad_grad, ops::LeakyReluDoubleGrad);
REGISTER_ACTIVATION_CPU_KERNEL(leaky_relu, LeakyRelu, LeakyReluFunctor,
LeakyReluGradFunctor);
REGISTER_OP_CPU_KERNEL(
leaky_relu_grad_grad,
ops::ActivationDoubleGradKernel<plat::CPUDeviceContext,
ops::LeakyReluGradGradFunctor<float>>,
ops::ActivationDoubleGradKernel<plat::CPUDeviceContext,
ops::LeakyReluGradGradFunctor<double>>,
ops::ActivationDoubleGradKernel<
plat::CPUDeviceContext, ops::LeakyReluGradGradFunctor<plat::float16>>);
12 changes: 12 additions & 0 deletions paddle/fluid/operators/activation_op.cu
Original file line number Diff line number Diff line change
Expand Up @@ -33,6 +33,18 @@ namespace plat = paddle::platform;

FOR_EACH_ACTIVATION_OP(REGISTER_ACTIVATION_CUDA_KERNEL);

REGISTER_ACTIVATION_CUDA_KERNEL(leaky_relu, LeakyRelu, LeakyReluFunctor,
LeakyReluGradFunctor);

REGISTER_OP_CUDA_KERNEL(
leaky_relu_grad_grad,
ops::ActivationDoubleGradKernel<plat::CUDADeviceContext,
ops::LeakyReluGradGradFunctor<float>>,
ops::ActivationDoubleGradKernel<plat::CUDADeviceContext,
ops::LeakyReluGradGradFunctor<double>>,
ops::ActivationDoubleGradKernel<
plat::CUDADeviceContext, ops::LeakyReluGradGradFunctor<plat::float16>>);

REGISTER_ACTIVATION_CUDA_KERNEL(relu, Relu, ReluFunctor, ReluGradFunctor);

REGISTER_OP_CUDA_KERNEL(
Expand Down
79 changes: 59 additions & 20 deletions paddle/fluid/operators/activation_op.h
Original file line number Diff line number Diff line change
Expand Up @@ -545,7 +545,7 @@ struct ZeroGradFunctor : public BaseActivationFunctor<T> {
template <typename Device, typename X, typename Out, typename dOut,
typename dX>
void operator()(Device d, X x, Out out, dOut dout, dX dx) const {
dx.device(d) = static_cast<T>(0) / out;
dx.device(d) = static_cast<T>(0) * out;
}

static constexpr ActBwdOpFwdDeps FwdDeps() { return kNoDeps; }
Expand Down Expand Up @@ -1208,45 +1208,31 @@ inline void ExtractActivationDoubleGradTensor(
const framework::Tensor** Out, const framework::Tensor** ddX,
framework::Tensor** dX, framework::Tensor** dOut,
framework::Tensor** ddOut) {
auto out_var = ctx.InputVar("Out");
auto ddx_var = ctx.InputVar("DDX");
auto ddo_var = ctx.OutputVar("DDOut");
auto do_var = ctx.OutputVar("DOut");
PADDLE_ENFORCE(out_var != nullptr,
"Cannot get input Variable Out, variable name = %s",
ctx.op().Input("Out"));
PADDLE_ENFORCE(ddx_var != nullptr,
"Cannot get input Variable %s, variable name = %s", "DDX",
"Cannot get input Variable Out, variable name = %s",
ctx.op().Input("DDX"));
if (CanBeUsedBySelectedRows.count(ctx.op().Type())) {
*Out = paddle::framework::GetLoDTensorOrSelectedRowsValueFromVar(*out_var);
*ddX = paddle::framework::GetLoDTensorOrSelectedRowsValueFromVar(*ddx_var);
if (ddo_var) {
*ddOut = paddle::framework::GetMutableLoDTensorOrSelectedRowsValueFromVar(
ddo_var);
}
if (do_var) {
*dOut = paddle::framework::GetMutableLoDTensorOrSelectedRowsValueFromVar(
do_var);
}
} else {
*Out = ctx.Input<framework::Tensor>("Out");
*ddX = ctx.Input<framework::Tensor>("DDX");
if (ddo_var) {
*ddOut = ctx.Output<framework::Tensor>("DDOut");
}
if (do_var) {
*dOut = ctx.Output<framework::Tensor>("DOut");
}
}
PADDLE_ENFORCE(*ddX != nullptr,
"Cannot get output tensor %s, variable name = %s", "DDX",
"Cannot get output tensor DDX, variable name = %s",
ctx.op().Output("DDX"));

if (static_cast<int>(kDepValue) & static_cast<int>(kDepX)) {
auto x_var = ctx.InputVar("X");
PADDLE_ENFORCE(x_var != nullptr,
"Cannot get input tensor X, variable name = %s",
"Cannot get input Variable Out, variable name = %s",
ctx.op().Input("X"));
auto dx_var = ctx.OutputVar("DX");
if (CanBeUsedBySelectedRows.count(ctx.op().Type())) {
Expand All @@ -1262,9 +1248,33 @@ inline void ExtractActivationDoubleGradTensor(
}
}
} else {
VLOG(10) << " Inplace activation of Op : " << ctx.op().Type();
VLOG(10) << "Inplace activation of Op: " << ctx.op().Type();
*X = *ddX;
}
if (static_cast<int>(kDepValue) & static_cast<int>(kDepOut)) {
auto out_var = ctx.InputVar("Out");
PADDLE_ENFORCE(out_var != nullptr,
"Cannot get input tensor Out, variable name = %s",
ctx.op().Input("Out"));
auto dout_var = ctx.OutputVar("DOut");
if (CanBeUsedBySelectedRows.count(ctx.op().Type())) {
*Out =
paddle::framework::GetLoDTensorOrSelectedRowsValueFromVar(*out_var);
if (dout_var) {
*dOut =
paddle::framework::GetMutableLoDTensorOrSelectedRowsValueFromVar(
dout_var);
}
} else {
*Out = ctx.Input<framework::Tensor>("Out");
if (dout_var) {
*dOut = ctx.Output<framework::Tensor>("DOut");
}
}
} else {
VLOG(10) << "Inplace activation of Op: " << ctx.op().Type();
*Out = *ddX;
}
}

template <typename DeviceContext, typename Functor>
Expand Down Expand Up @@ -1318,6 +1328,36 @@ struct ReluGradGradFunctor : public BaseActivationFunctor<T> {
static constexpr ActBwdOpFwdDeps FwdDeps() { return kDepOut; }
};

template <typename T>
struct LeakyReluGradGradFunctor : public BaseActivationFunctor<T> {
float alpha;
typename BaseActivationFunctor<T>::AttrPair GetAttrs() {
return {{"alpha", &alpha}};
}
template <typename Device>
void operator()(const Device& dev, const framework::Tensor* X,
const framework::Tensor* Out, const framework::Tensor* ddX,
framework::Tensor* ddOut, framework::Tensor* dOut,
framework::Tensor* dX) const {
auto* d = dev.eigen_device();
auto ddx = framework::EigenVector<T>::Flatten(detail::Ref(ddX));
auto x = framework::EigenVector<T>::Flatten(detail::Ref(X));
if (ddOut) {
auto ddout = framework::EigenVector<T>::Flatten(detail::Ref(ddOut));
ddout.device(*d) = ddx *
((x >= static_cast<T>(0)).template cast<T>().eval() +
static_cast<T>(alpha) *
(x < static_cast<T>(0)).template cast<T>().eval())
.template cast<T>();
}
if (dX) {
auto dx = framework::EigenVector<T>::Flatten(detail::Ref(dX));
dx.device(*d) = dx.constant(static_cast<T>(0));
}
}
static constexpr ActBwdOpFwdDeps FwdDeps() { return kDepX; }
};

} // namespace operators
} // namespace paddle

Expand Down Expand Up @@ -1349,7 +1389,6 @@ struct ReluGradGradFunctor : public BaseActivationFunctor<T> {
__macro(softplus, Softplus, SoftplusFunctor, SoftplusGradFunctor); \
__macro(softsign, Softsign, SoftsignFunctor, SoftsignGradFunctor); \
__macro(relu6, Relu6, Relu6Functor, Relu6GradFunctor); \
__macro(leaky_relu, LeakyRelu, LeakyReluFunctor, LeakyReluGradFunctor); \
__macro(tanh_shrink, TanhShrink, TanhShrinkFunctor, TanhShrinkGradFunctor); \
__macro(elu, ELU, ELUFunctor, ELUGradFunctor); \
__macro(hard_shrink, HardShrink, HardShrinkFunctor, HardShrinkGradFunctor); \
Expand Down
Loading