diff --git a/lite/core/mir/control_flow_op_shared_inputs_and_outputs_place_sync_pass.cc b/lite/core/mir/control_flow_op_shared_inputs_and_outputs_place_sync_pass.cc index bb166ab222b..283c4b51c14 100644 --- a/lite/core/mir/control_flow_op_shared_inputs_and_outputs_place_sync_pass.cc +++ b/lite/core/mir/control_flow_op_shared_inputs_and_outputs_place_sync_pass.cc @@ -64,6 +64,8 @@ void ControlFlowOpSharedInputsAndOutputsPlaceSyncPass::Apply( var_name, var_node->AsArg().type)); } } + + // sync input var for (auto& sub_op_node : (*graphs_)[sub_block_idx]->StmtTopologicalOrder()) { if (!sub_op_node->IsStmt()) continue; @@ -71,9 +73,19 @@ void ControlFlowOpSharedInputsAndOutputsPlaceSyncPass::Apply( CheckAndSyncTypeOfVarNode(sub_var_node, ref_var_types); } for (auto* sub_var_node : sub_op_node->outlinks) { - CheckAndSyncTypeOfVarNode(sub_var_node, ref_var_types); + auto& var_name = sub_var_node->AsArg().name; + if (!ref_var_types.count(var_name)) { + ref_var_types.insert(std::pair( + var_name, sub_var_node->AsArg().type)); + } } } + + // sync output var + for (auto* var_node : op_node->outlinks) { + CHECK(var_node->IsArg()); + CheckAndSyncTypeOfVarNode(var_node, ref_var_types); + } } } diff --git a/lite/core/mir/io_copy_kernel_pick_pass.cc b/lite/core/mir/io_copy_kernel_pick_pass.cc index 9e01632d28e..29a33ff0260 100644 --- a/lite/core/mir/io_copy_kernel_pick_pass.cc +++ b/lite/core/mir/io_copy_kernel_pick_pass.cc @@ -33,6 +33,8 @@ class IoCopyKernelPickPass : public StmtPass { CHECK(!kernels.empty()) << "No valid kernels found for IoCopy Op"; const auto* inty = node.inlinks.front()->AsArg().type; const auto* outy = node.outlinks.front()->AsArg().type; + CHECK((inty->IsTensor() && outy->IsTensor()) || + (inty->IsTensorList() && outy->IsTensorList())); LOG(INFO) << "input type " << *inty; LOG(INFO) << "output type " << *outy; @@ -42,10 +44,18 @@ class IoCopyKernelPickPass : public StmtPass { CHECK_EQ(node.inlinks.size(), 1UL); CHECK_EQ(node.outlinks.size(), 1UL); - const Type* in_arg_ty = kernel->GetInputDeclType("Input"); - const Type* out_arg_ty = kernel->GetOutputDeclType("Out"); + const Type* in_arg_ty = nullptr; + const Type* out_arg_ty = nullptr; + if (inty->IsTensor()) { + in_arg_ty = kernel->GetInputDeclType("Input"); + out_arg_ty = kernel->GetOutputDeclType("Out"); + } else { + in_arg_ty = kernel->GetInputDeclType("InputArray"); + out_arg_ty = kernel->GetOutputDeclType("OutArray"); + } LOG(INFO) << "checking kernel candidate " << *in_arg_ty << "->" << *out_arg_ty; + if (TargetCompatibleTo(*inty, *in_arg_ty)) { // Both the input and output type matches, remove other kernels // directly. diff --git a/lite/core/mir/type_target_cast_pass.cc b/lite/core/mir/type_target_cast_pass.cc index a1be6c64dc2..e4286a6ddb4 100644 --- a/lite/core/mir/type_target_cast_pass.cc +++ b/lite/core/mir/type_target_cast_pass.cc @@ -124,8 +124,17 @@ void TypeTargetTransformPass::AddIoCopyInst( // to.target() // The precision and layout should be equal to from.precision(), // from.layout() - io_copy_output_arg->AsArg().type = - LiteType::GetTensorTy(to.target(), from.precision(), from.layout()); + bool is_tensor = from.IsTensor(); + if (!is_tensor) { + CHECK(from.IsTensorList()) << "only support tensor or tensor_array."; + } + if (is_tensor) { + io_copy_output_arg->AsArg().type = + LiteType::GetTensorTy(to.target(), from.precision(), from.layout()); + } else { + io_copy_output_arg->AsArg().type = LiteType::GetTensorListTy( + to.target(), from.precision(), from.layout()); + } auto* io_copy_inst = graph->NewInstructNode(); bool in_persist = in->AsArg().is_weight || in->AsArg().is_persist; @@ -141,8 +150,13 @@ void TypeTargetTransformPass::AddIoCopyInst( // Create IoCopy Instruction. cpp::OpDesc op_desc; op_desc.SetType(io_copy_type); - op_desc.SetInput("Input", {in->AsArg().name}); - op_desc.SetOutput("Out", {io_copy_output_name}); + if (is_tensor) { + op_desc.SetInput("Input", {in->AsArg().name}); + op_desc.SetOutput("Out", {io_copy_output_name}); + } else { + op_desc.SetInput("InputArray", {in->AsArg().name}); + op_desc.SetOutput("OutArray", {io_copy_output_name}); + } io_copy_op->Attach(op_desc, inst_node->AsStmt().op()->scope()); auto kernels = io_copy_op->CreateKernels(valid_places); @@ -150,8 +164,15 @@ void TypeTargetTransformPass::AddIoCopyInst( bool is_found = false; std::vector> selected_kernels; for (auto& kernel : kernels) { - const Type* in_arg_ty = kernel->GetInputDeclType("Input"); - const Type* out_arg_ty = kernel->GetOutputDeclType("Out"); + const Type* in_arg_ty = nullptr; + const Type* out_arg_ty = nullptr; + if (is_tensor) { + in_arg_ty = kernel->GetInputDeclType("Input"); + out_arg_ty = kernel->GetOutputDeclType("Out"); + } else { + in_arg_ty = kernel->GetInputDeclType("InputArray"); + out_arg_ty = kernel->GetOutputDeclType("OutArray"); + } VLOG(4) << "------ kernel info -------"; VLOG(4) << "*in_arg_ty(io_copy kernel input):" << *in_arg_ty; diff --git a/lite/core/type_system.h b/lite/core/type_system.h index 3a2ce270cea..557983133f8 100644 --- a/lite/core/type_system.h +++ b/lite/core/type_system.h @@ -212,15 +212,19 @@ static bool PrecisionCompatibleTo(const Type& a, const Type& b) { a.precision() == PRECISION(kAny))); } static bool PrecisionCompatible(const Type& a, const Type& b) { - return a.IsVoid() || b.IsVoid() || // - (a.IsTensor() && b.IsTensor() && (a.precision() == b.precision() || // - b.precision() == PRECISION(kAny) || - a.precision() == PRECISION(kAny))); + return a.IsVoid() || b.IsVoid() || // + (((a.IsTensor() && b.IsTensor()) || + (a.IsTensorList() && b.IsTensorList())) && + (a.precision() == b.precision() || // + b.precision() == PRECISION(kAny) || + a.precision() == PRECISION(kAny))); } static bool DeviceCompatibleTo(const Type& a, const Type& b) { return a.IsVoid() || // - (a.IsTensor() && b.IsTensor() && (a.device() == b.device())); + (((a.IsTensor() && b.IsTensor()) || + (a.IsTensorList() && b.IsTensorList())) && // + (a.device() == b.device())); } // Can type 'a' be passed to 'b' directly. diff --git a/lite/kernels/host/while_compute.cc b/lite/kernels/host/while_compute.cc index ce67e25db85..8184718c3d6 100644 --- a/lite/kernels/host/while_compute.cc +++ b/lite/kernels/host/while_compute.cc @@ -41,17 +41,17 @@ void WhileCompute::Run() { REGISTER_LITE_KERNEL( while, kHost, kAny, kAny, paddle::lite::kernels::host::WhileCompute, def) .BindInput("X", - {LiteType::GetTensorListTy(TARGET(kHost), - PRECISION(kAny), - DATALAYOUT(kAny))}) + {LiteType::GetTensorTy(TARGET(kHost), + PRECISION(kAny), + DATALAYOUT(kAny))}) .BindInput("Condition", {LiteType::GetTensorTy(TARGET(kHost), PRECISION(kBool), DATALAYOUT(kAny))}) .BindOutput("Out", - {LiteType::GetTensorListTy(TARGET(kHost), - PRECISION(kAny), - DATALAYOUT(kAny))}) + {LiteType::GetTensorTy(TARGET(kHost), + PRECISION(kAny), + DATALAYOUT(kAny))}) .BindOutput("StepScopes", {LiteType::GetTensorTy(TARGET(kHost), PRECISION(kAny), diff --git a/lite/kernels/xpu/io_copy_compute.cc b/lite/kernels/xpu/io_copy_compute.cc index c8a116db41a..d7ec7e6536d 100644 --- a/lite/kernels/xpu/io_copy_compute.cc +++ b/lite/kernels/xpu/io_copy_compute.cc @@ -27,23 +27,33 @@ namespace xpu { class IoCopyHostToXPUCompute : public KernelLite { public: - void Run() override { - auto& param = Param(); - if (param.x->target() == TARGET(kHost) || - param.x->target() == TARGET(kX86) || - param.x->target() == TARGET(kARM)) { - auto mem_size = param.x->memory_size(); + void IoCopyHostToDevice(const Tensor* x, Tensor* y) { + if (x->target() == TARGET(kHost) || x->target() == TARGET(kX86) || + x->target() == TARGET(kARM)) { + auto mem_size = x->memory_size(); VLOG(4) << "host to xpu, copy size " << mem_size; - auto* data = param.y->mutable_data(TARGET(kXPU), mem_size); + auto* data = y->mutable_data(TARGET(kXPU), mem_size); if (mem_size > 0) { TargetWrapperXPU::MemcpySync( - data, param.x->raw_data(), mem_size, IoDirection::HtoD); + data, x->raw_data(), mem_size, IoDirection::HtoD); } - } else if (param.x->target() == TARGET(kXPU)) { - param.y->ShareDataWith(*(param.x)); + } else if (x->target() == TARGET(kXPU)) { + y->ShareDataWith(*x); } else { LOG(FATAL) << "IoCopyHostToXPU can not handle with the input target: " - << static_cast(param.x->target()); + << lite_api::TargetToStr(x->target()); + } + } + + void Run() override { + auto& param = Param(); + if (param.x != nullptr) { + IoCopyHostToDevice(param.x, param.y); + } + if (param.x_array != nullptr) { + for (size_t i = 0; i < param.x_array->size(); i++) { + IoCopyHostToDevice(&(param.x_array->at(i)), &(param.y_array->at(i))); + } } } @@ -76,19 +86,33 @@ class IoCopyHostToXPUCompute class IoCopyXPUToHostCompute : public KernelLite { public: - void Run() override { - auto& param = Param(); - CHECK(param.x->target() == TARGET(kXPU)); - auto mem_size = param.x->memory_size(); - if (param.y->target() != TARGET(kXPU)) { + void IoCopyDeviceToHost(const Tensor* x, Tensor* y) { + if (x->target() == TARGET(kXPU)) { + auto mem_size = x->memory_size(); VLOG(4) << "xpu to host, copy size " << mem_size; - auto* data = param.y->mutable_data(TARGET(kHost), mem_size); + auto* data = y->mutable_data(TARGET(kHost), mem_size); if (mem_size > 0) { TargetWrapperXPU::MemcpySync( - data, param.x->raw_data(), mem_size, IoDirection::DtoH); + data, x->raw_data(), mem_size, IoDirection::DtoH); } + } else if (x->target() == TARGET(kHost) || x->target() == TARGET(kX86) || + x->target() == TARGET(kARM)) { + y->ShareDataWith(*x); } else { - param.y->ShareDataWith(*(param.x)); + LOG(FATAL) << "IoCopyXPUToHost can not handle with the input target: " + << lite_api::TargetToStr(x->target()); + } + } + + void Run() override { + auto& param = Param(); + if (param.x != nullptr) { + IoCopyDeviceToHost(param.x, param.y); + } + if (param.x_array != nullptr) { + for (size_t i = 0; i < param.x_array->size(); i++) { + IoCopyDeviceToHost(&(param.x_array->at(i)), &(param.y_array->at(i))); + } } } @@ -110,10 +134,18 @@ REGISTER_LITE_KERNEL(io_copy, {LiteType::GetTensorTy(TARGET(kHost), PRECISION(kAny), DATALAYOUT(kAny))}) + .BindInput("InputArray", + {LiteType::GetTensorListTy(TARGET(kHost), + PRECISION(kAny), + DATALAYOUT(kAny))}) .BindOutput("Out", {LiteType::GetTensorTy(TARGET(kXPU), PRECISION(kAny), DATALAYOUT(kAny))}) + .BindOutput("OutArray", + {LiteType::GetTensorListTy(TARGET(kXPU), + PRECISION(kAny), + DATALAYOUT(kAny))}) .Finalize(); REGISTER_LITE_KERNEL(io_copy, @@ -126,10 +158,18 @@ REGISTER_LITE_KERNEL(io_copy, {LiteType::GetTensorTy(TARGET(kXPU), PRECISION(kAny), DATALAYOUT(kAny))}) + .BindInput("InputArray", + {LiteType::GetTensorListTy(TARGET(kXPU), + PRECISION(kAny), + DATALAYOUT(kAny))}) .BindOutput("Out", {LiteType::GetTensorTy(TARGET(kHost), PRECISION(kAny), DATALAYOUT(kAny))}) + .BindOutput("OutArray", + {LiteType::GetTensorListTy(TARGET(kHost), + PRECISION(kAny), + DATALAYOUT(kAny))}) .Finalize(); REGISTER_LITE_KERNEL(io_copy_once, @@ -142,10 +182,18 @@ REGISTER_LITE_KERNEL(io_copy_once, {LiteType::GetTensorTy(TARGET(kHost), PRECISION(kAny), DATALAYOUT(kAny))}) + .BindInput("InputArray", + {LiteType::GetTensorListTy(TARGET(kHost), + PRECISION(kAny), + DATALAYOUT(kAny))}) .BindOutput("Out", {LiteType::GetTensorTy(TARGET(kXPU), PRECISION(kAny), DATALAYOUT(kAny))}) + .BindOutput("OutArray", + {LiteType::GetTensorListTy(TARGET(kXPU), + PRECISION(kAny), + DATALAYOUT(kAny))}) .Finalize(); REGISTER_LITE_KERNEL(io_copy_once, @@ -158,8 +206,16 @@ REGISTER_LITE_KERNEL(io_copy_once, {LiteType::GetTensorTy(TARGET(kXPU), PRECISION(kAny), DATALAYOUT(kAny))}) + .BindInput("InputArray", + {LiteType::GetTensorListTy(TARGET(kXPU), + PRECISION(kAny), + DATALAYOUT(kAny))}) .BindOutput("Out", {LiteType::GetTensorTy(TARGET(kHost), PRECISION(kAny), DATALAYOUT(kAny))}) + .BindOutput("OutArray", + {LiteType::GetTensorListTy(TARGET(kHost), + PRECISION(kAny), + DATALAYOUT(kAny))}) .Finalize(); diff --git a/lite/operators/io_copy_op.cc b/lite/operators/io_copy_op.cc index 1160b3bcfe0..003b55c671a 100644 --- a/lite/operators/io_copy_op.cc +++ b/lite/operators/io_copy_op.cc @@ -20,29 +20,59 @@ namespace lite { namespace operators { bool IoCopyOp::CheckShape() const { - CHECK_OR_FALSE(param_.x); - CHECK_OR_FALSE(param_.y); + CHECK(param_.x != nullptr || param_.x_array != nullptr); + if (param_.x != nullptr) { + CHECK(param_.y != nullptr); + } + if (param_.x_array != nullptr) { + CHECK(param_.y_array != nullptr); + } return true; } + bool IoCopyOp::InferShapeImpl() const { - param_.y->Resize(param_.x->dims()); - param_.y->set_lod(param_.x->lod()); - param_.y->set_precision(param_.x->precision()); - param_.y->set_persistable(param_.x->persistable()); + if (param_.x != nullptr) { + param_.y->Resize(param_.x->dims()); + param_.y->set_lod(param_.x->lod()); + param_.y->set_precision(param_.x->precision()); + param_.y->set_persistable(param_.x->persistable()); + } + if (param_.x_array != nullptr) { + param_.y_array->resize(param_.x_array->size()); + for (size_t i = 0; i < param_.x_array->size(); i++) { + param_.y_array->at(i).Resize(param_.x_array->at(i).dims()); + param_.y_array->at(i).set_lod(param_.x_array->at(i).lod()); + param_.y_array->at(i).set_precision(param_.x_array->at(i).precision()); + param_.y_array->at(i).set_persistable( + param_.x_array->at(i).persistable()); + } + } return true; } + bool IoCopyOp::Run() { return OpLite::Run(); } + bool IoCopyOp::AttachImpl(const cpp::OpDesc &opdesc, paddle::lite::Scope *scope) { - auto x = opdesc.Input("Input").front(); - auto out = opdesc.Output("Out").front(); - param_.x = GetTensor(scope, x); - param_.y = GetMutableTensor(scope, out); + if (opdesc.HasInput("Input")) { + param_.x = scope->FindTensor(opdesc.Input("Input").front()); + } + if (opdesc.HasInput("InputArray")) { + param_.x_array = scope->FindTensorList(opdesc.Input("InputArray").front()); + } + if (opdesc.HasOutput("Out")) { + param_.y = scope->FindMutableTensor(opdesc.Output("Out").front()); + } + if (opdesc.HasOutput("OutArray")) { + param_.y_array = + scope->FindMutableTensorList(opdesc.Output("OutArray").front()); + } if (opdesc.HasAttr("process_type")) { param_.process_type = opdesc.GetAttr("process_type"); } return true; } + std::string IoCopyOp::DebugString() const { return "io_copy_op"; } } // namespace operators diff --git a/lite/operators/op_params.h b/lite/operators/op_params.h index 8d177d6c5c9..4af420690f3 100644 --- a/lite/operators/op_params.h +++ b/lite/operators/op_params.h @@ -69,8 +69,10 @@ struct FetchParam : ParamBase { // Helper op for lite framework struct IoCopyParam : ParamBase { - const lite::Tensor* x{}; - lite::Tensor* y{}; + const lite::Tensor* x{nullptr}; + const std::vector* x_array{nullptr}; + lite::Tensor* y{nullptr}; + std::vector* y_array{nullptr}; int process_type{0}; };