From 31ccaf091641b991af885427eb3071a276ccc70e Mon Sep 17 00:00:00 2001
From: luotao1 <luotao02@baidu.com>
Date: Mon, 11 Mar 2019 19:58:41 +0800
Subject: [PATCH 1/3] add all_kernels_must_compute_runtime_shape example for
 speedup infershape

test=develop
---
 paddle/fluid/framework/operator.cc                    | 11 +++++++++--
 .../operators/fused/fused_embedding_seq_pool_op.cc    | 11 ++++++++---
 paddle/fluid/operators/hash_op.cc                     | 11 ++++++++---
 .../operators/sequence_ops/sequence_enumerate_op.cc   | 11 ++++++++---
 4 files changed, 33 insertions(+), 11 deletions(-)
diff --git a/paddle/fluid/framework/operator.cc b/paddle/fluid/framework/operator.cc
index df1689764d21fc..9f48b8cb9e72e7 100644
--- a/paddle/fluid/framework/operator.cc
+++ b/paddle/fluid/framework/operator.cc
@@ -926,8 +926,15 @@ void OperatorWithKernel::RunImpl(const Scope& scope,
     dev_ctx = pool.Get(expected_kernel_key.place_);
   }
 
-  RuntimeInferShapeContext infer_shape_ctx(*this, exec_scope, ctx);
-  this->InferShape(&infer_shape_ctx);
+  // If Op has attribute all_kernels_must_compute_runtime_shape,
+  // all the kernels of this Op would compute runtime shape,
+  // and skip infershape in runtime for speedup.
+  // TODO(luotao): Note that it is a temporal attribute, after all ops
+  // implement computing runtime shape, this attribute would be deleted.
+  if (!HasAttr("all_kernels_must_compute_runtime_shape")) {
+    RuntimeInferShapeContext infer_shape_ctx(*this, exec_scope, ctx);
+    this->InferShape(&infer_shape_ctx);
+  }
   // TODO(panyx0718): ExecutionContext should only depend on RuntimeContext
   // not Scope. Imperative mode only pass inputs and get outputs.
   kernel_iter->second(
diff --git a/paddle/fluid/operators/fused/fused_embedding_seq_pool_op.cc b/paddle/fluid/operators/fused/fused_embedding_seq_pool_op.cc
index 80caf70b08e659..17a81d3e8805a5 100644
--- a/paddle/fluid/operators/fused/fused_embedding_seq_pool_op.cc
+++ b/paddle/fluid/operators/fused/fused_embedding_seq_pool_op.cc
@@ -23,9 +23,6 @@ class FusedEmbeddingSeqPoolOp : public framework::OperatorWithKernel {
   using framework::OperatorWithKernel::OperatorWithKernel;
 
   void InferShape(framework::InferShapeContext* ctx) const override {
-    if (ctx->IsRuntime()) {
-      return;
-    }
     PADDLE_ENFORCE(ctx->HasInput("W"),
                    "Input W of FusedEmbeddingSeqPoolOp should not be null.");
     PADDLE_ENFORCE(ctx->HasInput("Ids"),
@@ -91,6 +88,14 @@ class FusedEmbeddingSeqPoolOpMaker : public framework::OpProtoAndCheckerMaker {
                   "(boolean, default false) "
                   "Sparse update.")
         .SetDefault(false);
+    AddAttr<bool>(
+        "all_kernels_must_compute_runtime_shape",
+        "(boolean, default true) "
+        "An attribute to speed up OperatorWithKernel::RunImpl."
+        "If true, all the kernels of this Op would compute runtime "
+        "shape, but skip infershape in runtime. Note that it is a temporal "
+        "attribute, please do DOT set it in python layer.")
+        .SetDefault(true);
     AddComment(R"DOC(
 FusedEmbeddingSeqPool Operator.
 
diff --git a/paddle/fluid/operators/hash_op.cc b/paddle/fluid/operators/hash_op.cc
index 7a29f80ff1ce41..b39eba081ec32e 100644
--- a/paddle/fluid/operators/hash_op.cc
+++ b/paddle/fluid/operators/hash_op.cc
@@ -26,9 +26,6 @@ class HashOp : public framework::OperatorWithKernel {
       : OperatorWithKernel(type, inputs, outputs, attrs) {}
 
   void InferShape(framework::InferShapeContext *ctx) const override {
-    if (ctx->IsRuntime()) {
-      return;
-    }
     PADDLE_ENFORCE(ctx->HasInput("X"),
                    "Input(X) of HashOp should not be null.");
     PADDLE_ENFORCE(ctx->HasOutput("Out"),
@@ -57,6 +54,14 @@ class HashOpMaker : public framework::OpProtoAndCheckerMaker {
 )DOC");
     AddAttr<int>("num_hash", "").SetDefault(1);
     AddAttr<int>("mod_by", "").SetDefault(100000);
+    AddAttr<bool>(
+        "all_kernels_must_compute_runtime_shape",
+        "(boolean, default true) "
+        "An attribute to speed up OperatorWithKernel::RunImpl."
+        "If true, all the kernels of this Op would compute runtime "
+        "shape, but skip infershape in runtime. Note that it is a temporal "
+        "attribute, please do DOT set it in python layer.")
+        .SetDefault(true);
   }
 };
 
diff --git a/paddle/fluid/operators/sequence_ops/sequence_enumerate_op.cc b/paddle/fluid/operators/sequence_ops/sequence_enumerate_op.cc
index d3dcd1f96a986d..63e95e86544120 100644
--- a/paddle/fluid/operators/sequence_ops/sequence_enumerate_op.cc
+++ b/paddle/fluid/operators/sequence_ops/sequence_enumerate_op.cc
@@ -22,9 +22,6 @@ class SequenceEnumerateOp : public framework::OperatorWithKernel {
   using framework::OperatorWithKernel::OperatorWithKernel;
 
   void InferShape(framework::InferShapeContext* ctx) const override {
-    if (ctx->IsRuntime()) {
-      return;
-    }
     PADDLE_ENFORCE(
         ctx->HasInput("X"),
         "Input(X) of SequecceEnumerate operator should not be null.");
@@ -62,6 +59,14 @@ class SequenceEnumerateOpMaker : public framework::OpProtoAndCheckerMaker {
         });
     AddAttr<int>("pad_value", "(int) The enumerate sequence padding value.")
         .SetDefault(0);
+    AddAttr<bool>(
+        "all_kernels_must_compute_runtime_shape",
+        "(boolean, default true) "
+        "An attribute to speed up OperatorWithKernel::RunImpl."
+        "If true, all the kernels of this Op would compute runtime "
+        "shape, but skip infershape in runtime. Note that it is a temporal "
+        "attribute, please do DOT set it in python layer.")
+        .SetDefault(true);
     AddComment(R"DOC(
 Sequence Enumerate Operator.
 

From 5d20954ac4fb551897be710c643737931e3cb7c3 Mon Sep 17 00:00:00 2001
From: luotao1 <luotao02@baidu.com>
Date: Tue, 12 Mar 2019 14:18:32 +0800
Subject: [PATCH 2/3] add runtime shape for fuse_emb_seq_pool_grad

test=develop
---
 paddle/fluid/operators/fused/fused_embedding_seq_pool_op.h | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/paddle/fluid/operators/fused/fused_embedding_seq_pool_op.h b/paddle/fluid/operators/fused/fused_embedding_seq_pool_op.h
index 5e2e336e7117cc..4651c2b2ba81a4 100644
--- a/paddle/fluid/operators/fused/fused_embedding_seq_pool_op.h
+++ b/paddle/fluid/operators/fused/fused_embedding_seq_pool_op.h
@@ -121,6 +121,8 @@ class FusedEmbeddingSeqPoolGradKernel : public framework::OpKernel<T> {
       auto *ids = context.Input<LoDTensor>("Ids");
       auto *d_output = context.Input<LoDTensor>(framework::GradVarName("Out"));
       auto *d_table = context.Output<SelectedRows>(framework::GradVarName("W"));
+      // runtime shape
+      d_table->set_height(table_dim[0]);
 
       auto *ids_data = ids->data<int64_t>();
       int64_t ids_num = ids->numel();

From fe78a92e6ea57a201f320c572d7f2ad6a0ff968a Mon Sep 17 00:00:00 2001
From: luotao1 <luotao02@baidu.com>
Date: Tue, 12 Mar 2019 21:15:13 +0800
Subject: [PATCH 3/3] refine with comments

test=develop
---
 paddle/fluid/framework/operator.cc                       | 7 +------
 paddle/fluid/framework/operator.h                        | 9 +++++++++
 .../fluid/operators/fused/fused_embedding_seq_pool_op.cc | 8 +-------
 paddle/fluid/operators/hash_op.cc                        | 8 +-------
 .../operators/sequence_ops/sequence_enumerate_op.cc      | 8 +-------
 5 files changed, 13 insertions(+), 27 deletions(-)

diff --git a/paddle/fluid/framework/operator.cc b/paddle/fluid/framework/operator.cc
index 9f48b8cb9e72e7..e4bbcabea71ec9 100644
--- a/paddle/fluid/framework/operator.cc
+++ b/paddle/fluid/framework/operator.cc
@@ -926,12 +926,7 @@ void OperatorWithKernel::RunImpl(const Scope& scope,
     dev_ctx = pool.Get(expected_kernel_key.place_);
   }
 
-  // If Op has attribute all_kernels_must_compute_runtime_shape,
-  // all the kernels of this Op would compute runtime shape,
-  // and skip infershape in runtime for speedup.
-  // TODO(luotao): Note that it is a temporal attribute, after all ops
-  // implement computing runtime shape, this attribute would be deleted.
-  if (!HasAttr("all_kernels_must_compute_runtime_shape")) {
+  if (!HasAttr(kAllKernelsMustComputeRuntimeShape)) {
     RuntimeInferShapeContext infer_shape_ctx(*this, exec_scope, ctx);
     this->InferShape(&infer_shape_ctx);
   }
diff --git a/paddle/fluid/framework/operator.h b/paddle/fluid/framework/operator.h
index 55629636a81698..822bf5c9ceaa31 100644
--- a/paddle/fluid/framework/operator.h
+++ b/paddle/fluid/framework/operator.h
@@ -62,6 +62,15 @@ constexpr char kZeroVarSuffix[] = "@ZERO";
 /// Variables with this suffix are the new Gradient.
 constexpr char kNewGradSuffix[] = "@NEWGRAD@";
 
+/// If an Op has this attribute, all its kernels should calculate output
+/// variable's shape in the corresponding Compute() function. And
+/// OperatorWithKernel::RunImpl() would skip call this Op's InferShape()
+/// function in its runtime for speedup.
+/// TODO(luotao): Note that this temporal attribute would be deleted after all
+/// ops contain it.
+constexpr char kAllKernelsMustComputeRuntimeShape[] =
+    "@ALL_KERNELS_MUST_COMPUTE_RUNTIME_SHAPE@";
+
 // define some kernel priority
 /* Define multiple kernel type fallback order*/
 extern std::vector<std::tuple<platform::Place, LibraryType>> kKernelPriority;
diff --git a/paddle/fluid/operators/fused/fused_embedding_seq_pool_op.cc b/paddle/fluid/operators/fused/fused_embedding_seq_pool_op.cc
index 17a81d3e8805a5..a0026427e25147 100644
--- a/paddle/fluid/operators/fused/fused_embedding_seq_pool_op.cc
+++ b/paddle/fluid/operators/fused/fused_embedding_seq_pool_op.cc
@@ -88,13 +88,7 @@ class FusedEmbeddingSeqPoolOpMaker : public framework::OpProtoAndCheckerMaker {
                   "(boolean, default false) "
                   "Sparse update.")
         .SetDefault(false);
-    AddAttr<bool>(
-        "all_kernels_must_compute_runtime_shape",
-        "(boolean, default true) "
-        "An attribute to speed up OperatorWithKernel::RunImpl."
-        "If true, all the kernels of this Op would compute runtime "
-        "shape, but skip infershape in runtime. Note that it is a temporal "
-        "attribute, please do DOT set it in python layer.")
+    AddAttr<bool>(framework::kAllKernelsMustComputeRuntimeShape, "")
         .SetDefault(true);
     AddComment(R"DOC(
 FusedEmbeddingSeqPool Operator.
diff --git a/paddle/fluid/operators/hash_op.cc b/paddle/fluid/operators/hash_op.cc
index b39eba081ec32e..f6395fb32feac1 100644
--- a/paddle/fluid/operators/hash_op.cc
+++ b/paddle/fluid/operators/hash_op.cc
@@ -54,13 +54,7 @@ class HashOpMaker : public framework::OpProtoAndCheckerMaker {
 )DOC");
     AddAttr<int>("num_hash", "").SetDefault(1);
     AddAttr<int>("mod_by", "").SetDefault(100000);
-    AddAttr<bool>(
-        "all_kernels_must_compute_runtime_shape",
-        "(boolean, default true) "
-        "An attribute to speed up OperatorWithKernel::RunImpl."
-        "If true, all the kernels of this Op would compute runtime "
-        "shape, but skip infershape in runtime. Note that it is a temporal "
-        "attribute, please do DOT set it in python layer.")
+    AddAttr<bool>(framework::kAllKernelsMustComputeRuntimeShape, "")
         .SetDefault(true);
   }
 };
diff --git a/paddle/fluid/operators/sequence_ops/sequence_enumerate_op.cc b/paddle/fluid/operators/sequence_ops/sequence_enumerate_op.cc
index 63e95e86544120..f357c9c08d042b 100644
--- a/paddle/fluid/operators/sequence_ops/sequence_enumerate_op.cc
+++ b/paddle/fluid/operators/sequence_ops/sequence_enumerate_op.cc
@@ -59,13 +59,7 @@ class SequenceEnumerateOpMaker : public framework::OpProtoAndCheckerMaker {
         });
     AddAttr<int>("pad_value", "(int) The enumerate sequence padding value.")
         .SetDefault(0);
-    AddAttr<bool>(
-        "all_kernels_must_compute_runtime_shape",
-        "(boolean, default true) "
-        "An attribute to speed up OperatorWithKernel::RunImpl."
-        "If true, all the kernels of this Op would compute runtime "
-        "shape, but skip infershape in runtime. Note that it is a temporal "
-        "attribute, please do DOT set it in python layer.")
+    AddAttr<bool>(framework::kAllKernelsMustComputeRuntimeShape, "")
         .SetDefault(true);
     AddComment(R"DOC(
 Sequence Enumerate Operator.