Develop xiaobingw (PaddlePaddle#21)

XBWGC · web-flow · commit 305317cd6c58 · 2021-08-04T16:18:46.000+08:00
* apply lint

* add ipu_build_strategy

* del unused code

* add popart_canonicalization_utils to ipu_backend deps

* Update compiler.py

* Update ipu_build_strategy.cc

* apply lint

* add LowerConst

* update test case and resolve comments

* add TensorCopy(tensor_util.cc)
diff --git a/paddle/fluid/framework/ipu/ipu_backend.cc b/paddle/fluid/framework/ipu/ipu_backend.cc
@@ -70,6 +70,7 @@ void IpuBackend::Compile(ir::Graph* graph,
     }
   }
 
+  LowerWeights(graph);
   LowerBody(graph);
 
   VLOG(1) << "-- fetch_list --";
@@ -121,7 +122,7 @@ void IpuBackend::Prepare() {
 }
 
 void IpuBackend::Run(const std::vector<const Tensor*>& inputs,
-                     std::vector<Tensor*>& outputs) {
+                     const std::vector<Tensor*>& outputs) {
   Prepare();
 
   std::map<popart::TensorId, popart::IArray&> popart_inputs;
@@ -171,6 +172,36 @@ std::vector<std::string> IpuBackend::GetOpInputs(const OpDesc* op) {
   return inputs;
 }
 
+void IpuBackend::LowerWeights(const ir::Graph* graph) {
+  PADDLE_ENFORCE_NOT_NULL(scope_,
+                          platform::errors::PreconditionNotMet(
+                              "You should call set_scope before LowerWeights"));
+
+  // at this step, i think the graph doesn't contains optimizer
+  // related states
+  for (const auto* node : graph->Nodes()) {
+    if (node->IsVar() && !node->IsCtrlVar() && node->Var()) {
+      if (node->Var()->Persistable()) {
+        auto var_name = node->Var()->Name();
+        auto var = scope_->FindVar(var_name);
+        if (var) {
+          auto tensor = var->Get<framework::LoDTensor>();
+          auto dtype = VarType2PopartType(tensor.type());
+          auto shape = std::vector<int64_t>();
+          for (size_t i = 0; i < tensor.dims().size(); ++i) {
+            shape.push_back(tensor.dims().at(i));
+          }
+          popart::TensorInfo tensor_info(dtype, shape);
+          popart::ConstVoidData const_data{tensor.data<void>(), tensor_info};
+          popart::TensorId result =
+              builder_->addInitializedInputTensor(const_data);
+          tensors_.emplace(var_name, result);
+        }
+      }
+    }
+  }
+}
+
 void IpuBackend::LowerBody(const ir::Graph* graph) {
   auto nodes = TopologySortOperations(*graph);
   for (const auto* node : nodes) {
@@ -230,7 +261,8 @@ void IpuBackend::LowerBody(const ir::Graph* graph) {
           builder_->aiOnnxOpset11().reducemean(inputs, axes, keepdims);
       tensors_.emplace(outputs[0], result);
     } else {
-      PADDLE_THROW(platform::errors::Unimplemented("Unimplemented."));
+      PADDLE_THROW(platform::errors::Unimplemented("Unimplemented op type %s.",
+                                                   op_type));
     }
   }
 }
diff --git a/paddle/fluid/framework/ipu/ipu_backend.h b/paddle/fluid/framework/ipu/ipu_backend.h
@@ -15,6 +15,9 @@ limitations under the License. */
 #pragma once
 
 #include <map>
+#include <string>
+#include <vector>
+
 #include <popart/builder.hpp>
 #include <popart/dataflow.hpp>
 #include <popart/devicemanager.hpp>
@@ -24,12 +27,9 @@ limitations under the License. */
 #include <popart/sessionoptions.hpp>
 #include <popart/stepio.hpp>
 #include <popart/tensorinfo.hpp>
-#include <string>
-#include <unordered_set>
-#include <vector>
 
-#include "paddle/fluid/framework/ipu/ipu_build_strategy.h"
 #include "paddle/fluid/framework/feed_fetch_type.h"
+#include "paddle/fluid/framework/ipu/ipu_build_strategy.h"
 #include "paddle/fluid/framework/ir/graph.h"
 #include "paddle/fluid/framework/lod_tensor.h"
 #include "paddle/fluid/framework/scope.h"
@@ -49,13 +49,13 @@ struct Optimizer {
 
 class IpuBackend {
  public:
-  explicit IpuBackend();
+  IpuBackend();
 
   void Compile(ir::Graph *graph, const std::vector<std::string> &feed_list,
                const std::vector<std::string> &fetch_list);
 
   void Run(const std::vector<const Tensor *> &inputs,
-           std::vector<Tensor *> &outputs);
+           const std::vector<Tensor *> &outputs);
 
   std::string GetOptimizerType() { return optimizer_.type; }
 
@@ -74,9 +74,7 @@ class IpuBackend {
   }
 
   // SetScope, so we can get model parameters from scope
-  void SetScope(Scope* scope) {
-    scope_ = scope;
-  }
+  void SetScope(Scope *scope) { scope_ = scope; }
 
   static std::shared_ptr<IpuBackend> GetInstance() {
     if (NULL == instance_) {
@@ -87,6 +85,7 @@ class IpuBackend {
 
  private:
   void Prepare();
+  void LowerWeights(const ir::Graph *);
   void LowerBody(const ir::Graph *);
   std::vector<std::string> GetOpInputs(const OpDesc *op);
 
diff --git a/paddle/fluid/framework/ir/ipu/ipu_runtime_replacer_pass.cc b/paddle/fluid/framework/ir/ipu/ipu_runtime_replacer_pass.cc
@@ -33,7 +33,6 @@ namespace ir {
 
 void IpuRuntimeReplacerPass::ApplyImpl(ir::Graph* graph) const {
   VLOG(10) << "enter IpuRuntimeReplacerPass::ApplyImpl";
-
   VLOG(10) << "Raw Graph: ";
   VLOG(10) << DebugString(graph);
 
diff --git a/paddle/fluid/framework/ir/ipu/optimizer_extract_pass.cc b/paddle/fluid/framework/ir/ipu/optimizer_extract_pass.cc
@@ -15,6 +15,7 @@
 #include "paddle/fluid/framework/ir/ipu/optimizer_extract_pass.h"
 
 #include "paddle/fluid/framework/ipu/ipu_backend.h"
+#include "paddle/fluid/framework/ir/pass_tester_helper.h"
 #include "paddle/fluid/framework/op_proto_maker.h"
 
 namespace paddle {
@@ -24,6 +25,10 @@ namespace ir {
 class Graph;
 
 void IpuOptimizerExtractPass::ApplyImpl(ir::Graph* graph) const {
+  VLOG(10) << "enter IpuOptimizerExtractPass::ApplyImpl";
+  VLOG(10) << "Raw Graph: ";
+  VLOG(10) << DebugString(graph);
+
   auto ipu_backend = paddle::framework::IpuBackend::GetInstance();
 
   for (auto* node : graph->Nodes()) {
@@ -51,11 +56,15 @@ void IpuOptimizerExtractPass::ApplyImpl(ir::Graph* graph) const {
       }
     }
   }
+
+  VLOG(10) << "Post Graph: ";
+  VLOG(10) << DebugString(graph);
+  VLOG(10) << "leave IpuOptimizerExtractPass::ApplyImpl";
 }
 
 }  // namespace ir
 }  // namespace framework
 }  // namespace paddle
 
 REGISTER_PASS(optimizer_extract_pass,
-              paddle::framework::ir::IpuOptimizerExtractPass);
+              paddle::framework::ir::IpuOptimizerExtractPass);
diff --git a/paddle/fluid/framework/tensor_util.cc b/paddle/fluid/framework/tensor_util.cc
@@ -74,6 +74,14 @@ void TensorCopy(const Tensor& src, const platform::Place& dst_place,
     memory::Copy(BOOST_GET_CONST(platform::CPUPlace, dst_place), dst_ptr,
                  BOOST_GET_CONST(platform::CPUPlace, src_place), src_ptr, size);
   }
+
+#ifdef PADDLE_WITH_IPU
+  if (platform::is_cpu_place(src_place) && platform::is_ipu_place(dst_place)) {
+    memory::Copy(BOOST_GET_CONST(platform::IPUPlace, dst_place), dst_ptr,
+                 BOOST_GET_CONST(platform::CPUPlace, src_place), src_ptr, size);
+  }
+#endif
+
 #ifdef PADDLE_WITH_XPU
   else if (platform::is_xpu_place(src_place) &&  // NOLINT
            platform::is_cpu_place(dst_place)) {
diff --git a/python/paddle/fluid/compiler.py b/python/paddle/fluid/compiler.py
@@ -515,7 +515,8 @@ def __init__(self, program, scope=None, ipu_build_strategy=None):
         self._backend = core.IpuBackend()
         self._backend.set_scope(self._scope)
         self._graph_passes = [
-            "optimizer_extract_pass", "forward_graph_extract_pass"
+            "optimizer_extract_pass", "forward_graph_extract_pass",
+            "popart_canonicalization_pass"
         ]
 
     def compile(self, feed_list, fetch_list, scope=None):
@@ -528,6 +529,11 @@ def compile(self, feed_list, fetch_list, scope=None):
         ipu_graph_builder_pass.set("fetch_list", fetch_list)
         ipu_graph_builder_pass.apply(self._graph)
 
+        ipu_runtime_replacer_pass = core.get_pass("ipu_runtime_replacer_pass")
+        ipu_runtime_replacer_pass.set("feed_list", feed_list)
+        ipu_runtime_replacer_pass.set("fetch_list", fetch_list)
+        ipu_runtime_replacer_pass.apply(self._graph)
+
         convert_pass = core.get_pass('graph_to_program_pass')
         desc = core.ProgramDesc()
         convert_pass.set_not_owned('program', desc)
diff --git a/python/paddle/fluid/tests/unittests/ipu/ipu_simple_add.py b/python/paddle/fluid/tests/unittests/ipu/ipu_simple_add.py
@@ -15,6 +15,7 @@
 import numpy as np
 import paddle
 import paddle.fluid.core as core
+import paddle.fluid.compiler as compiler
 from paddle.static import Program
 
 paddle.enable_static()
@@ -37,36 +38,10 @@
 main_prog = paddle.static.default_main_program()
 print(main_prog._to_readable_code())
 
-graph = core.Graph(main_prog.desc)
-
-print(graph)
-
-#graph_viz_pass = core.get_pass("graph_viz_pass")
-#graph_viz_path = "./test_viz_pass"
-#graph_viz_pass.set('graph_viz_path', graph_viz_path)
-#graph = graph_viz_pass.apply(graph)
-
 feed_list = ['a', 'b']
 fetch_list = ['tmp_0']
+program = compiler.IpuCompiler(main_prog).compile(feed_list, fetch_list)
 
-popart_canonicalization_pass = core.get_pass("popart_canonicalization_pass")
-popart_canonicalization_pass.apply(graph)
-
-ipu_graph_builder_pass = core.get_pass("ipu_graph_builder_pass")
-ipu_graph_builder_pass.set("feed_list", feed_list)
-ipu_graph_builder_pass.set("fetch_list", fetch_list)
-ipu_graph_builder_pass.apply(graph)
-
-ipu_runtime_replacer_pass = core.get_pass("ipu_runtime_replacer_pass")
-ipu_runtime_replacer_pass.set("feed_list", feed_list)
-ipu_runtime_replacer_pass.set("fetch_list", fetch_list)
-ipu_runtime_replacer_pass.apply(graph)
-
-convert_pass = core.get_pass('graph_to_program_pass')
-desc = core.ProgramDesc()
-convert_pass.set_not_owned('program', desc)
-convert_pass.apply(graph)
-program = Program._construct_from_desc(desc)
 print("Program to run:")
 print(program._to_readable_code())
 
diff --git a/python/paddle/fluid/tests/unittests/ipu/ipu_training_test.py b/python/paddle/fluid/tests/unittests/ipu/ipu_training_test.py
@@ -27,28 +27,28 @@
 adam.minimize(loss)
 
 # 运行期：先运行一次startup program初始化网络参数，然后调用飞桨的Executor和CompiledProgram API运行网络。
-place = paddle.IPUPlace()  # 使用何种设备运行网络，IPUPlace表示使用IPU运行
+place = paddle.IPUPlace(0)  # 使用何种设备运行网络，IPUPlace表示使用IPU运行
 executor = paddle.static.Executor(place)  # 创建执行器
 print("---------- startup_program --------------")
 prog = paddle.static.default_startup_program()
 print(prog._to_readable_code())
-executor.run(
-    paddle.static.default_startup_program())  # 运行startup program进行参数初始化
+executor.run(prog)  # 运行startup program进行参数初始化
+
 print("---------- main_program --------------")
 prog = paddle.static.default_main_program()
 print(prog._to_readable_code())
 
 # 再使用CompiledProgram编译网络，准备执行。
-compiled_program = paddle.static.CompiledProgram(
-    paddle.static.default_main_program())
+compiled_program = paddle.static.CompiledProgram(prog)
 
 BATCH_NUM = 2
 BATCH_SIZE = 32
 
 for batch_id in range(BATCH_NUM):
     input_image = np.random.random([BATCH_SIZE, 3, 224, 224]).astype('float32')
-    loss_numpy, = executor.run(
-        compiled_program, feed={'image': input_image}, fetch_list=[loss])
+    loss_numpy, = executor.run(compiled_program,
+                               feed={'image': input_image},
+                               fetch_list=[loss])
     print("Batch {}, loss = {}".format(batch_id, loss_numpy))
 
 # 关闭静态图模式