From 31ccaf091641b991af885427eb3071a276ccc70e Mon Sep 17 00:00:00 2001 From: luotao1 Date: Mon, 11 Mar 2019 19:58:41 +0800 Subject: [PATCH 1/3] add all_kernels_must_compute_runtime_shape example for speedup infershape test=develop --- paddle/fluid/framework/operator.cc | 11 +++++++++-- .../operators/fused/fused_embedding_seq_pool_op.cc | 11 ++++++++--- paddle/fluid/operators/hash_op.cc | 11 ++++++++--- .../operators/sequence_ops/sequence_enumerate_op.cc | 11 ++++++++--- 4 files changed, 33 insertions(+), 11 deletions(-) diff --git a/paddle/fluid/framework/operator.cc b/paddle/fluid/framework/operator.cc index df1689764d21fc..9f48b8cb9e72e7 100644 --- a/paddle/fluid/framework/operator.cc +++ b/paddle/fluid/framework/operator.cc @@ -926,8 +926,15 @@ void OperatorWithKernel::RunImpl(const Scope& scope, dev_ctx = pool.Get(expected_kernel_key.place_); } - RuntimeInferShapeContext infer_shape_ctx(*this, exec_scope, ctx); - this->InferShape(&infer_shape_ctx); + // If Op has attribute all_kernels_must_compute_runtime_shape, + // all the kernels of this Op would compute runtime shape, + // and skip infershape in runtime for speedup. + // TODO(luotao): Note that it is a temporal attribute, after all ops + // implement computing runtime shape, this attribute would be deleted. + if (!HasAttr("all_kernels_must_compute_runtime_shape")) { + RuntimeInferShapeContext infer_shape_ctx(*this, exec_scope, ctx); + this->InferShape(&infer_shape_ctx); + } // TODO(panyx0718): ExecutionContext should only depend on RuntimeContext // not Scope. Imperative mode only pass inputs and get outputs. kernel_iter->second( diff --git a/paddle/fluid/operators/fused/fused_embedding_seq_pool_op.cc b/paddle/fluid/operators/fused/fused_embedding_seq_pool_op.cc index 80caf70b08e659..17a81d3e8805a5 100644 --- a/paddle/fluid/operators/fused/fused_embedding_seq_pool_op.cc +++ b/paddle/fluid/operators/fused/fused_embedding_seq_pool_op.cc @@ -23,9 +23,6 @@ class FusedEmbeddingSeqPoolOp : public framework::OperatorWithKernel { using framework::OperatorWithKernel::OperatorWithKernel; void InferShape(framework::InferShapeContext* ctx) const override { - if (ctx->IsRuntime()) { - return; - } PADDLE_ENFORCE(ctx->HasInput("W"), "Input W of FusedEmbeddingSeqPoolOp should not be null."); PADDLE_ENFORCE(ctx->HasInput("Ids"), @@ -91,6 +88,14 @@ class FusedEmbeddingSeqPoolOpMaker : public framework::OpProtoAndCheckerMaker { "(boolean, default false) " "Sparse update.") .SetDefault(false); + AddAttr( + "all_kernels_must_compute_runtime_shape", + "(boolean, default true) " + "An attribute to speed up OperatorWithKernel::RunImpl." + "If true, all the kernels of this Op would compute runtime " + "shape, but skip infershape in runtime. Note that it is a temporal " + "attribute, please do DOT set it in python layer.") + .SetDefault(true); AddComment(R"DOC( FusedEmbeddingSeqPool Operator. diff --git a/paddle/fluid/operators/hash_op.cc b/paddle/fluid/operators/hash_op.cc index 7a29f80ff1ce41..b39eba081ec32e 100644 --- a/paddle/fluid/operators/hash_op.cc +++ b/paddle/fluid/operators/hash_op.cc @@ -26,9 +26,6 @@ class HashOp : public framework::OperatorWithKernel { : OperatorWithKernel(type, inputs, outputs, attrs) {} void InferShape(framework::InferShapeContext *ctx) const override { - if (ctx->IsRuntime()) { - return; - } PADDLE_ENFORCE(ctx->HasInput("X"), "Input(X) of HashOp should not be null."); PADDLE_ENFORCE(ctx->HasOutput("Out"), @@ -57,6 +54,14 @@ class HashOpMaker : public framework::OpProtoAndCheckerMaker { )DOC"); AddAttr("num_hash", "").SetDefault(1); AddAttr("mod_by", "").SetDefault(100000); + AddAttr( + "all_kernels_must_compute_runtime_shape", + "(boolean, default true) " + "An attribute to speed up OperatorWithKernel::RunImpl." + "If true, all the kernels of this Op would compute runtime " + "shape, but skip infershape in runtime. Note that it is a temporal " + "attribute, please do DOT set it in python layer.") + .SetDefault(true); } }; diff --git a/paddle/fluid/operators/sequence_ops/sequence_enumerate_op.cc b/paddle/fluid/operators/sequence_ops/sequence_enumerate_op.cc index d3dcd1f96a986d..63e95e86544120 100644 --- a/paddle/fluid/operators/sequence_ops/sequence_enumerate_op.cc +++ b/paddle/fluid/operators/sequence_ops/sequence_enumerate_op.cc @@ -22,9 +22,6 @@ class SequenceEnumerateOp : public framework::OperatorWithKernel { using framework::OperatorWithKernel::OperatorWithKernel; void InferShape(framework::InferShapeContext* ctx) const override { - if (ctx->IsRuntime()) { - return; - } PADDLE_ENFORCE( ctx->HasInput("X"), "Input(X) of SequecceEnumerate operator should not be null."); @@ -62,6 +59,14 @@ class SequenceEnumerateOpMaker : public framework::OpProtoAndCheckerMaker { }); AddAttr("pad_value", "(int) The enumerate sequence padding value.") .SetDefault(0); + AddAttr( + "all_kernels_must_compute_runtime_shape", + "(boolean, default true) " + "An attribute to speed up OperatorWithKernel::RunImpl." + "If true, all the kernels of this Op would compute runtime " + "shape, but skip infershape in runtime. Note that it is a temporal " + "attribute, please do DOT set it in python layer.") + .SetDefault(true); AddComment(R"DOC( Sequence Enumerate Operator. From 5d20954ac4fb551897be710c643737931e3cb7c3 Mon Sep 17 00:00:00 2001 From: luotao1 Date: Tue, 12 Mar 2019 14:18:32 +0800 Subject: [PATCH 2/3] add runtime shape for fuse_emb_seq_pool_grad test=develop --- paddle/fluid/operators/fused/fused_embedding_seq_pool_op.h | 2 ++ 1 file changed, 2 insertions(+) diff --git a/paddle/fluid/operators/fused/fused_embedding_seq_pool_op.h b/paddle/fluid/operators/fused/fused_embedding_seq_pool_op.h index 5e2e336e7117cc..4651c2b2ba81a4 100644 --- a/paddle/fluid/operators/fused/fused_embedding_seq_pool_op.h +++ b/paddle/fluid/operators/fused/fused_embedding_seq_pool_op.h @@ -121,6 +121,8 @@ class FusedEmbeddingSeqPoolGradKernel : public framework::OpKernel { auto *ids = context.Input("Ids"); auto *d_output = context.Input(framework::GradVarName("Out")); auto *d_table = context.Output(framework::GradVarName("W")); + // runtime shape + d_table->set_height(table_dim[0]); auto *ids_data = ids->data(); int64_t ids_num = ids->numel(); From fe78a92e6ea57a201f320c572d7f2ad6a0ff968a Mon Sep 17 00:00:00 2001 From: luotao1 Date: Tue, 12 Mar 2019 21:15:13 +0800 Subject: [PATCH 3/3] refine with comments test=develop --- paddle/fluid/framework/operator.cc | 7 +------ paddle/fluid/framework/operator.h | 9 +++++++++ .../fluid/operators/fused/fused_embedding_seq_pool_op.cc | 8 +------- paddle/fluid/operators/hash_op.cc | 8 +------- .../operators/sequence_ops/sequence_enumerate_op.cc | 8 +------- 5 files changed, 13 insertions(+), 27 deletions(-) diff --git a/paddle/fluid/framework/operator.cc b/paddle/fluid/framework/operator.cc index 9f48b8cb9e72e7..e4bbcabea71ec9 100644 --- a/paddle/fluid/framework/operator.cc +++ b/paddle/fluid/framework/operator.cc @@ -926,12 +926,7 @@ void OperatorWithKernel::RunImpl(const Scope& scope, dev_ctx = pool.Get(expected_kernel_key.place_); } - // If Op has attribute all_kernels_must_compute_runtime_shape, - // all the kernels of this Op would compute runtime shape, - // and skip infershape in runtime for speedup. - // TODO(luotao): Note that it is a temporal attribute, after all ops - // implement computing runtime shape, this attribute would be deleted. - if (!HasAttr("all_kernels_must_compute_runtime_shape")) { + if (!HasAttr(kAllKernelsMustComputeRuntimeShape)) { RuntimeInferShapeContext infer_shape_ctx(*this, exec_scope, ctx); this->InferShape(&infer_shape_ctx); } diff --git a/paddle/fluid/framework/operator.h b/paddle/fluid/framework/operator.h index 55629636a81698..822bf5c9ceaa31 100644 --- a/paddle/fluid/framework/operator.h +++ b/paddle/fluid/framework/operator.h @@ -62,6 +62,15 @@ constexpr char kZeroVarSuffix[] = "@ZERO"; /// Variables with this suffix are the new Gradient. constexpr char kNewGradSuffix[] = "@NEWGRAD@"; +/// If an Op has this attribute, all its kernels should calculate output +/// variable's shape in the corresponding Compute() function. And +/// OperatorWithKernel::RunImpl() would skip call this Op's InferShape() +/// function in its runtime for speedup. +/// TODO(luotao): Note that this temporal attribute would be deleted after all +/// ops contain it. +constexpr char kAllKernelsMustComputeRuntimeShape[] = + "@ALL_KERNELS_MUST_COMPUTE_RUNTIME_SHAPE@"; + // define some kernel priority /* Define multiple kernel type fallback order*/ extern std::vector> kKernelPriority; diff --git a/paddle/fluid/operators/fused/fused_embedding_seq_pool_op.cc b/paddle/fluid/operators/fused/fused_embedding_seq_pool_op.cc index 17a81d3e8805a5..a0026427e25147 100644 --- a/paddle/fluid/operators/fused/fused_embedding_seq_pool_op.cc +++ b/paddle/fluid/operators/fused/fused_embedding_seq_pool_op.cc @@ -88,13 +88,7 @@ class FusedEmbeddingSeqPoolOpMaker : public framework::OpProtoAndCheckerMaker { "(boolean, default false) " "Sparse update.") .SetDefault(false); - AddAttr( - "all_kernels_must_compute_runtime_shape", - "(boolean, default true) " - "An attribute to speed up OperatorWithKernel::RunImpl." - "If true, all the kernels of this Op would compute runtime " - "shape, but skip infershape in runtime. Note that it is a temporal " - "attribute, please do DOT set it in python layer.") + AddAttr(framework::kAllKernelsMustComputeRuntimeShape, "") .SetDefault(true); AddComment(R"DOC( FusedEmbeddingSeqPool Operator. diff --git a/paddle/fluid/operators/hash_op.cc b/paddle/fluid/operators/hash_op.cc index b39eba081ec32e..f6395fb32feac1 100644 --- a/paddle/fluid/operators/hash_op.cc +++ b/paddle/fluid/operators/hash_op.cc @@ -54,13 +54,7 @@ class HashOpMaker : public framework::OpProtoAndCheckerMaker { )DOC"); AddAttr("num_hash", "").SetDefault(1); AddAttr("mod_by", "").SetDefault(100000); - AddAttr( - "all_kernels_must_compute_runtime_shape", - "(boolean, default true) " - "An attribute to speed up OperatorWithKernel::RunImpl." - "If true, all the kernels of this Op would compute runtime " - "shape, but skip infershape in runtime. Note that it is a temporal " - "attribute, please do DOT set it in python layer.") + AddAttr(framework::kAllKernelsMustComputeRuntimeShape, "") .SetDefault(true); } }; diff --git a/paddle/fluid/operators/sequence_ops/sequence_enumerate_op.cc b/paddle/fluid/operators/sequence_ops/sequence_enumerate_op.cc index 63e95e86544120..f357c9c08d042b 100644 --- a/paddle/fluid/operators/sequence_ops/sequence_enumerate_op.cc +++ b/paddle/fluid/operators/sequence_ops/sequence_enumerate_op.cc @@ -59,13 +59,7 @@ class SequenceEnumerateOpMaker : public framework::OpProtoAndCheckerMaker { }); AddAttr("pad_value", "(int) The enumerate sequence padding value.") .SetDefault(0); - AddAttr( - "all_kernels_must_compute_runtime_shape", - "(boolean, default true) " - "An attribute to speed up OperatorWithKernel::RunImpl." - "If true, all the kernels of this Op would compute runtime " - "shape, but skip infershape in runtime. Note that it is a temporal " - "attribute, please do DOT set it in python layer.") + AddAttr(framework::kAllKernelsMustComputeRuntimeShape, "") .SetDefault(true); AddComment(R"DOC( Sequence Enumerate Operator.