Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -64,16 +64,28 @@ void ControlFlowOpSharedInputsAndOutputsPlaceSyncPass::Apply(
var_name, var_node->AsArg().type));
}
}

// sync input var
for (auto& sub_op_node :
(*graphs_)[sub_block_idx]->StmtTopologicalOrder()) {
if (!sub_op_node->IsStmt()) continue;
for (auto* sub_var_node : sub_op_node->inlinks) {
CheckAndSyncTypeOfVarNode(sub_var_node, ref_var_types);
}
for (auto* sub_var_node : sub_op_node->outlinks) {
CheckAndSyncTypeOfVarNode(sub_var_node, ref_var_types);
auto& var_name = sub_var_node->AsArg().name;
if (!ref_var_types.count(var_name)) {
ref_var_types.insert(std::pair<std::string, const Type*>(
var_name, sub_var_node->AsArg().type));
}
}
}

// sync output var
for (auto* var_node : op_node->outlinks) {
CHECK(var_node->IsArg());
CheckAndSyncTypeOfVarNode(var_node, ref_var_types);
}
}
}

Expand Down
14 changes: 12 additions & 2 deletions lite/core/mir/io_copy_kernel_pick_pass.cc
Original file line number Diff line number Diff line change
Expand Up @@ -33,6 +33,8 @@ class IoCopyKernelPickPass : public StmtPass {
CHECK(!kernels.empty()) << "No valid kernels found for IoCopy Op";
const auto* inty = node.inlinks.front()->AsArg().type;
const auto* outy = node.outlinks.front()->AsArg().type;
CHECK((inty->IsTensor() && outy->IsTensor()) ||
(inty->IsTensorList() && outy->IsTensorList()));
LOG(INFO) << "input type " << *inty;
LOG(INFO) << "output type " << *outy;

Expand All @@ -42,10 +44,18 @@ class IoCopyKernelPickPass : public StmtPass {
CHECK_EQ(node.inlinks.size(), 1UL);
CHECK_EQ(node.outlinks.size(), 1UL);

const Type* in_arg_ty = kernel->GetInputDeclType("Input");
const Type* out_arg_ty = kernel->GetOutputDeclType("Out");
const Type* in_arg_ty = nullptr;
const Type* out_arg_ty = nullptr;
if (inty->IsTensor()) {
in_arg_ty = kernel->GetInputDeclType("Input");
out_arg_ty = kernel->GetOutputDeclType("Out");
} else {
in_arg_ty = kernel->GetInputDeclType("InputArray");
out_arg_ty = kernel->GetOutputDeclType("OutArray");
}
LOG(INFO) << "checking kernel candidate " << *in_arg_ty << "->"
<< *out_arg_ty;

if (TargetCompatibleTo(*inty, *in_arg_ty)) {
// Both the input and output type matches, remove other kernels
// directly.
Expand Down
33 changes: 27 additions & 6 deletions lite/core/mir/type_target_cast_pass.cc
Original file line number Diff line number Diff line change
Expand Up @@ -124,8 +124,17 @@ void TypeTargetTransformPass::AddIoCopyInst(
// to.target()
// The precision and layout should be equal to from.precision(),
// from.layout()
io_copy_output_arg->AsArg().type =
LiteType::GetTensorTy(to.target(), from.precision(), from.layout());
bool is_tensor = from.IsTensor();
if (!is_tensor) {
CHECK(from.IsTensorList()) << "only support tensor or tensor_array.";
}
if (is_tensor) {
io_copy_output_arg->AsArg().type =
LiteType::GetTensorTy(to.target(), from.precision(), from.layout());
} else {
io_copy_output_arg->AsArg().type = LiteType::GetTensorListTy(
to.target(), from.precision(), from.layout());
}
auto* io_copy_inst = graph->NewInstructNode();

bool in_persist = in->AsArg().is_weight || in->AsArg().is_persist;
Expand All @@ -141,17 +150,29 @@ void TypeTargetTransformPass::AddIoCopyInst(
// Create IoCopy Instruction.
cpp::OpDesc op_desc;
op_desc.SetType(io_copy_type);
op_desc.SetInput("Input", {in->AsArg().name});
op_desc.SetOutput("Out", {io_copy_output_name});
if (is_tensor) {
op_desc.SetInput("Input", {in->AsArg().name});
op_desc.SetOutput("Out", {io_copy_output_name});
} else {
op_desc.SetInput("InputArray", {in->AsArg().name});
op_desc.SetOutput("OutArray", {io_copy_output_name});
}

io_copy_op->Attach(op_desc, inst_node->AsStmt().op()->scope());
auto kernels = io_copy_op->CreateKernels(valid_places);
// fix(MyPandaShaoxiang): select kernel that input_dcl_type same as in.type
bool is_found = false;
std::vector<std::unique_ptr<KernelBase>> selected_kernels;
for (auto& kernel : kernels) {
const Type* in_arg_ty = kernel->GetInputDeclType("Input");
const Type* out_arg_ty = kernel->GetOutputDeclType("Out");
const Type* in_arg_ty = nullptr;
const Type* out_arg_ty = nullptr;
if (is_tensor) {
in_arg_ty = kernel->GetInputDeclType("Input");
out_arg_ty = kernel->GetOutputDeclType("Out");
} else {
in_arg_ty = kernel->GetInputDeclType("InputArray");
out_arg_ty = kernel->GetOutputDeclType("OutArray");
}

VLOG(4) << "------ kernel info -------";
VLOG(4) << "*in_arg_ty(io_copy kernel input):" << *in_arg_ty;
Expand Down
14 changes: 9 additions & 5 deletions lite/core/type_system.h
Original file line number Diff line number Diff line change
Expand Up @@ -212,15 +212,19 @@ static bool PrecisionCompatibleTo(const Type& a, const Type& b) {
a.precision() == PRECISION(kAny)));
}
static bool PrecisionCompatible(const Type& a, const Type& b) {
return a.IsVoid() || b.IsVoid() || //
(a.IsTensor() && b.IsTensor() && (a.precision() == b.precision() || //
b.precision() == PRECISION(kAny) ||
a.precision() == PRECISION(kAny)));
return a.IsVoid() || b.IsVoid() || //
(((a.IsTensor() && b.IsTensor()) ||
(a.IsTensorList() && b.IsTensorList())) &&
(a.precision() == b.precision() || //
b.precision() == PRECISION(kAny) ||
a.precision() == PRECISION(kAny)));
}

static bool DeviceCompatibleTo(const Type& a, const Type& b) {
return a.IsVoid() || //
(a.IsTensor() && b.IsTensor() && (a.device() == b.device()));
(((a.IsTensor() && b.IsTensor()) ||
(a.IsTensorList() && b.IsTensorList())) && //
(a.device() == b.device()));
}

// Can type 'a' be passed to 'b' directly.
Expand Down
12 changes: 6 additions & 6 deletions lite/kernels/host/while_compute.cc
Original file line number Diff line number Diff line change
Expand Up @@ -41,17 +41,17 @@ void WhileCompute::Run() {
REGISTER_LITE_KERNEL(
while, kHost, kAny, kAny, paddle::lite::kernels::host::WhileCompute, def)
.BindInput("X",
{LiteType::GetTensorListTy(TARGET(kHost),
PRECISION(kAny),
DATALAYOUT(kAny))})
{LiteType::GetTensorTy(TARGET(kHost),
PRECISION(kAny),
DATALAYOUT(kAny))})
.BindInput("Condition",
{LiteType::GetTensorTy(TARGET(kHost),
PRECISION(kBool),
DATALAYOUT(kAny))})
.BindOutput("Out",
{LiteType::GetTensorListTy(TARGET(kHost),
PRECISION(kAny),
DATALAYOUT(kAny))})
{LiteType::GetTensorTy(TARGET(kHost),
PRECISION(kAny),
DATALAYOUT(kAny))})
.BindOutput("StepScopes",
{LiteType::GetTensorTy(TARGET(kHost),
PRECISION(kAny),
Expand Down
94 changes: 75 additions & 19 deletions lite/kernels/xpu/io_copy_compute.cc
Original file line number Diff line number Diff line change
Expand Up @@ -27,23 +27,33 @@ namespace xpu {
class IoCopyHostToXPUCompute
: public KernelLite<TARGET(kXPU), PRECISION(kAny), DATALAYOUT(kAny)> {
public:
void Run() override {
auto& param = Param<operators::IoCopyParam>();
if (param.x->target() == TARGET(kHost) ||
param.x->target() == TARGET(kX86) ||
param.x->target() == TARGET(kARM)) {
auto mem_size = param.x->memory_size();
void IoCopyHostToDevice(const Tensor* x, Tensor* y) {
if (x->target() == TARGET(kHost) || x->target() == TARGET(kX86) ||
x->target() == TARGET(kARM)) {
auto mem_size = x->memory_size();
VLOG(4) << "host to xpu, copy size " << mem_size;
auto* data = param.y->mutable_data(TARGET(kXPU), mem_size);
auto* data = y->mutable_data(TARGET(kXPU), mem_size);
if (mem_size > 0) {
TargetWrapperXPU::MemcpySync(
data, param.x->raw_data(), mem_size, IoDirection::HtoD);
data, x->raw_data(), mem_size, IoDirection::HtoD);
}
} else if (param.x->target() == TARGET(kXPU)) {
param.y->ShareDataWith(*(param.x));
} else if (x->target() == TARGET(kXPU)) {
y->ShareDataWith(*x);
} else {
LOG(FATAL) << "IoCopyHostToXPU can not handle with the input target: "
<< static_cast<int>(param.x->target());
<< lite_api::TargetToStr(x->target());
}
}

void Run() override {
auto& param = Param<operators::IoCopyParam>();
if (param.x != nullptr) {
IoCopyHostToDevice(param.x, param.y);
}
if (param.x_array != nullptr) {
for (size_t i = 0; i < param.x_array->size(); i++) {
IoCopyHostToDevice(&(param.x_array->at(i)), &(param.y_array->at(i)));
}
}
}

Expand Down Expand Up @@ -76,19 +86,33 @@ class IoCopyHostToXPUCompute
class IoCopyXPUToHostCompute
: public KernelLite<TARGET(kXPU), PRECISION(kAny), DATALAYOUT(kAny)> {
public:
void Run() override {
auto& param = Param<operators::IoCopyParam>();
CHECK(param.x->target() == TARGET(kXPU));
auto mem_size = param.x->memory_size();
if (param.y->target() != TARGET(kXPU)) {
void IoCopyDeviceToHost(const Tensor* x, Tensor* y) {
if (x->target() == TARGET(kXPU)) {
auto mem_size = x->memory_size();
VLOG(4) << "xpu to host, copy size " << mem_size;
auto* data = param.y->mutable_data(TARGET(kHost), mem_size);
auto* data = y->mutable_data(TARGET(kHost), mem_size);
if (mem_size > 0) {
TargetWrapperXPU::MemcpySync(
data, param.x->raw_data(), mem_size, IoDirection::DtoH);
data, x->raw_data(), mem_size, IoDirection::DtoH);
}
} else if (x->target() == TARGET(kHost) || x->target() == TARGET(kX86) ||
x->target() == TARGET(kARM)) {
y->ShareDataWith(*x);
} else {
param.y->ShareDataWith(*(param.x));
LOG(FATAL) << "IoCopyXPUToHost can not handle with the input target: "
<< lite_api::TargetToStr(x->target());
}
}

void Run() override {
auto& param = Param<operators::IoCopyParam>();
if (param.x != nullptr) {
IoCopyDeviceToHost(param.x, param.y);
}
if (param.x_array != nullptr) {
for (size_t i = 0; i < param.x_array->size(); i++) {
IoCopyDeviceToHost(&(param.x_array->at(i)), &(param.y_array->at(i)));
}
}
}

Expand All @@ -110,10 +134,18 @@ REGISTER_LITE_KERNEL(io_copy,
{LiteType::GetTensorTy(TARGET(kHost),
PRECISION(kAny),
DATALAYOUT(kAny))})
.BindInput("InputArray",
{LiteType::GetTensorListTy(TARGET(kHost),
PRECISION(kAny),
DATALAYOUT(kAny))})
.BindOutput("Out",
{LiteType::GetTensorTy(TARGET(kXPU),
PRECISION(kAny),
DATALAYOUT(kAny))})
.BindOutput("OutArray",
{LiteType::GetTensorListTy(TARGET(kXPU),
PRECISION(kAny),
DATALAYOUT(kAny))})
.Finalize();

REGISTER_LITE_KERNEL(io_copy,
Expand All @@ -126,10 +158,18 @@ REGISTER_LITE_KERNEL(io_copy,
{LiteType::GetTensorTy(TARGET(kXPU),
PRECISION(kAny),
DATALAYOUT(kAny))})
.BindInput("InputArray",
{LiteType::GetTensorListTy(TARGET(kXPU),
PRECISION(kAny),
DATALAYOUT(kAny))})
.BindOutput("Out",
{LiteType::GetTensorTy(TARGET(kHost),
PRECISION(kAny),
DATALAYOUT(kAny))})
.BindOutput("OutArray",
{LiteType::GetTensorListTy(TARGET(kHost),
PRECISION(kAny),
DATALAYOUT(kAny))})
.Finalize();

REGISTER_LITE_KERNEL(io_copy_once,
Expand All @@ -142,10 +182,18 @@ REGISTER_LITE_KERNEL(io_copy_once,
{LiteType::GetTensorTy(TARGET(kHost),
PRECISION(kAny),
DATALAYOUT(kAny))})
.BindInput("InputArray",
{LiteType::GetTensorListTy(TARGET(kHost),
PRECISION(kAny),
DATALAYOUT(kAny))})
.BindOutput("Out",
{LiteType::GetTensorTy(TARGET(kXPU),
PRECISION(kAny),
DATALAYOUT(kAny))})
.BindOutput("OutArray",
{LiteType::GetTensorListTy(TARGET(kXPU),
PRECISION(kAny),
DATALAYOUT(kAny))})
.Finalize();

REGISTER_LITE_KERNEL(io_copy_once,
Expand All @@ -158,8 +206,16 @@ REGISTER_LITE_KERNEL(io_copy_once,
{LiteType::GetTensorTy(TARGET(kXPU),
PRECISION(kAny),
DATALAYOUT(kAny))})
.BindInput("InputArray",
{LiteType::GetTensorListTy(TARGET(kXPU),
PRECISION(kAny),
DATALAYOUT(kAny))})
.BindOutput("Out",
{LiteType::GetTensorTy(TARGET(kHost),
PRECISION(kAny),
DATALAYOUT(kAny))})
.BindOutput("OutArray",
{LiteType::GetTensorListTy(TARGET(kHost),
PRECISION(kAny),
DATALAYOUT(kAny))})
.Finalize();
50 changes: 40 additions & 10 deletions lite/operators/io_copy_op.cc
Original file line number Diff line number Diff line change
Expand Up @@ -20,29 +20,59 @@ namespace lite {
namespace operators {

bool IoCopyOp::CheckShape() const {
CHECK_OR_FALSE(param_.x);
CHECK_OR_FALSE(param_.y);
CHECK(param_.x != nullptr || param_.x_array != nullptr);
if (param_.x != nullptr) {
CHECK(param_.y != nullptr);
}
if (param_.x_array != nullptr) {
CHECK(param_.y_array != nullptr);
}
return true;
}

bool IoCopyOp::InferShapeImpl() const {
param_.y->Resize(param_.x->dims());
param_.y->set_lod(param_.x->lod());
param_.y->set_precision(param_.x->precision());
param_.y->set_persistable(param_.x->persistable());
if (param_.x != nullptr) {
param_.y->Resize(param_.x->dims());
param_.y->set_lod(param_.x->lod());
param_.y->set_precision(param_.x->precision());
param_.y->set_persistable(param_.x->persistable());
}
if (param_.x_array != nullptr) {
param_.y_array->resize(param_.x_array->size());
for (size_t i = 0; i < param_.x_array->size(); i++) {
param_.y_array->at(i).Resize(param_.x_array->at(i).dims());
param_.y_array->at(i).set_lod(param_.x_array->at(i).lod());
param_.y_array->at(i).set_precision(param_.x_array->at(i).precision());
param_.y_array->at(i).set_persistable(
param_.x_array->at(i).persistable());
}
}
return true;
}

bool IoCopyOp::Run() { return OpLite::Run(); }

bool IoCopyOp::AttachImpl(const cpp::OpDesc &opdesc,
paddle::lite::Scope *scope) {
auto x = opdesc.Input("Input").front();
auto out = opdesc.Output("Out").front();
param_.x = GetTensor(scope, x);
param_.y = GetMutableTensor(scope, out);
if (opdesc.HasInput("Input")) {
param_.x = scope->FindTensor(opdesc.Input("Input").front());
}
if (opdesc.HasInput("InputArray")) {
param_.x_array = scope->FindTensorList(opdesc.Input("InputArray").front());
}
if (opdesc.HasOutput("Out")) {
param_.y = scope->FindMutableTensor(opdesc.Output("Out").front());
}
if (opdesc.HasOutput("OutArray")) {
param_.y_array =
scope->FindMutableTensorList(opdesc.Output("OutArray").front());
}
if (opdesc.HasAttr("process_type")) {
param_.process_type = opdesc.GetAttr<int>("process_type");
}
return true;
}

std::string IoCopyOp::DebugString() const { return "io_copy_op"; }

} // namespace operators
Expand Down
Loading