Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
42 changes: 37 additions & 5 deletions paddle/fluid/lite/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -34,7 +34,7 @@ endfunction()
function (lite_deps TARGET)
set(options "")
set(oneValueArgs "")
set(multiValueArgs DEPS X86_DEPS CUDA_DEPS ARM_DEPS PROFILE_DEPS ARGS)
set(multiValueArgs DEPS X86_DEPS CUDA_DEPS ARM_DEPS PROFILE_DEPS LIGHT_DEPS HVY_DEPS ARGS)
cmake_parse_arguments(lite_deps "${options}" "${oneValueArgs}" "${multiValueArgs}" ${ARGN})

set(deps ${lite_deps_DEPS})
Expand Down Expand Up @@ -63,14 +63,35 @@ function (lite_deps TARGET)
endforeach(var)
endif()

if(LITE_WITH_LIGHT_WEIGHT_FRAMEWORK)
foreach(var ${lite_deps_LIGHT_DEPS})
set(deps ${deps} ${var})
endforeach(var)
endif()

if (NOT LITE_WITH_LIGHT_WEIGHT_FRAMEWORK)
foreach(var ${lite_deps_HVY_DEPS})
set(deps ${deps} ${var})
endforeach(var)
endif()

set(${TARGET} ${deps} PARENT_SCOPE)

endfunction()

# cc_library with branch support.
# The branches:
# X86_DEPS: works only when LITE_WITH_X86 is ON.
# CUDA_DEPS: LITE_WITH_CUDA
# ARM_DEPS: LITE_WITH_ARM
# PROFILE_DEPS: LITE_WITH_PROFILE
# LIGHT_DEPS: LITE_WITH_LIGHT_WEIGHT_FRAMEWORK
# HVY_DEPS: NOT LITE_WITH_LIGHT_WEIGHT_FRAMEWORK
function(lite_cc_library TARGET)
set(options "")
set(oneValueArgs "")
set(multiValueArgs SRCS DEPS X86_DEPS CUDA_DEPS ARM_DEPS PROFILE_DEPS ARGS)
set(multiValueArgs SRCS DEPS X86_DEPS CUDA_DEPS ARM_DEPS PROFILE_DEPS LIGHT_DEPS
HVY_DEPS ARGS)
cmake_parse_arguments(args "${options}" "${oneValueArgs}" "${multiValueArgs}" ${ARGN})

set(deps "")
Expand All @@ -79,15 +100,19 @@ function(lite_cc_library TARGET)
X86_DEPS ${args_X86_DEPS}
CUDA_DEPS ${args_CUDA_DEPS}
ARM_DEPS ${args_ARM_DEPS}
PROFILE_DEPS ${args_PROFILE_DEPS})
PROFILE_DEPS ${args_PROFILE_DEPS}
LIGHT_DEPS ${args_LIGHT_DEPS}
HVY_DEPS ${args_HVY_DEPS}
)

cc_library(${TARGET} SRCS ${args_SRCS} DEPS ${deps} ${args_DEPS})
endfunction()

function(lite_cc_binary TARGET)
set(options "")
set(oneValueArgs "")
set(multiValueArgs SRCS DEPS X86_DEPS CUDA_DEPS ARM_DEPS PROFILE_DEPS ARGS)
set(multiValueArgs SRCS DEPS X86_DEPS CUDA_DEPS ARM_DEPS PROFILE_DEPS
LIGHT_DEPS HVY_DEPS ARGS)
cmake_parse_arguments(args "${options}" "${oneValueArgs}" "${multiValueArgs}" ${ARGN})

set(deps "")
Expand All @@ -97,6 +122,8 @@ function(lite_cc_binary TARGET)
CUDA_DEPS ${args_CUDA_DEPS}
ARM_DEPS ${args_ARM_DEPS}
PROFILE_DEPS ${args_PROFILE_DEPS}
LIGHT_DEPS ${args_LIGHT_DEPS}
HVY_DEPS ${args_HVY_DEPS}
)
cc_binary(${TARGET} SRCS ${args_SRCS} DEPS ${deps} ${args_DEPS})
endfunction()
Expand All @@ -112,7 +139,9 @@ endfunction()
function(lite_cc_test TARGET)
set(options "")
set(oneValueArgs "")
set(multiValueArgs SRCS DEPS X86_DEPS CUDA_DEPS ARM_DEPS PROFILE_DEPS ARGS)
set(multiValueArgs SRCS DEPS X86_DEPS CUDA_DEPS ARM_DEPS PROFILE_DEPS
LIGHT_DEPS HVY_DEPS
ARGS)
cmake_parse_arguments(args "${options}" "${oneValueArgs}" "${multiValueArgs}" ${ARGN})

set(deps "")
Expand All @@ -122,6 +151,8 @@ function(lite_cc_test TARGET)
CUDA_DEPS ${args_CUDA_DEPS}
ARM_DEPS ${args_ARM_DEPS}
PROFILE_DEPS ${args_PROFILE_DEPS}
LIGHT_DEPS ${args_LIGHT_DEPS}
HVY_DEPS ${args_HVY_DEPS}
)
_lite_cc_test(${TARGET} SRCS ${args_SRCS} DEPS ${deps} ARGS ${args_ARGS})
register_test_offline("${TARGET}")
Expand All @@ -137,3 +168,4 @@ add_subdirectory(kernels)
add_subdirectory(model_parser)
add_subdirectory(utils)
add_subdirectory(api)
add_subdirectory(gen_code)
1 change: 1 addition & 0 deletions paddle/fluid/lite/api/cxx_api_test.cc
Original file line number Diff line number Diff line change
Expand Up @@ -76,6 +76,7 @@ TEST(CXXApi, save_model) {
predictor.Build(FLAGS_model_dir, Place{TARGET(kCUDA), PRECISION(kFloat)},
valid_places);

LOG(INFO) << "Save optimized model to " << FLAGS_optimized_model;
predictor.SaveModel(FLAGS_optimized_model);
}
#endif // LITE_WITH_LIGHT_WEIGHT_FRAMEWORK
Expand Down
5 changes: 2 additions & 3 deletions paddle/fluid/lite/core/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@ lite_cc_library(target_wrapper_lite SRCS target_wrapper.cc
lite_cc_library(memory_lite SRCS memory.cc DEPS target_wrapper_lite)
lite_cc_library(lite_tensor SRCS lite_tensor.cc DEPS memory_lite target_wrapper_lite)
if (NOT LITE_WITH_LIGHT_WEIGHT_FRAMEWORK)
cc_library(hvy_tensor SRCS hvy_tensor.cc DEPS lod_tensor)
lite_cc_library(hvy_tensor SRCS hvy_tensor.cc DEPS lod_tensor HVY_DEPS framework_proto)
endif()

if(LITE_WITH_LIGHT_WEIGHT_FRAMEWORK)
Expand All @@ -26,8 +26,7 @@ cc_library(scope_lite SRCS scope.cc DEPS ${tensor_lite})
cc_library(cpu_info_lite SRCS cpu_info.cc)
cc_library(context_lite SRCS context.cc DEPS ${tensor_lite} any_lite cpu_info_lite)
cc_library(op_lite SRCS op_lite.cc DEPS scope_lite op_registry_lite target_wrapper_lite
cpp_op_desc_lite
${tensor_lite})
cpp_op_desc_lite ${tensor_lite})
cc_library(types_lite SRCS types.cc)
cc_library(type_system SRCS type_system.cc DEPS ${tensor_lite} target_wrapper_lite)

Expand Down
5 changes: 5 additions & 0 deletions paddle/fluid/lite/core/context.h
Original file line number Diff line number Diff line change
Expand Up @@ -173,6 +173,11 @@ class Context<TargetType::kX86> {
new ::paddle::framework::ExecutionContext(*x86_device_context_));
}

Context(Context&& ctx) {
x86_device_context_ = std::move(ctx.x86_device_context_);
x86_execution_context_ = std::move(ctx.x86_execution_context_);
}

// NOTE: InitOnce should only be used by ContextScheduler
void InitOnce() {}

Expand Down
10 changes: 9 additions & 1 deletion paddle/fluid/lite/core/hvy_tensor.h
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,7 @@
#pragma once
#include <vector>
#include "paddle/fluid/framework/lod_tensor.h"
#include "paddle/fluid/lite/core/target_wrapper.h"
#include "paddle/fluid/lite/core/tensor.h"

namespace paddle {
Expand Down Expand Up @@ -65,6 +66,14 @@ class TensorHvy : public TensorBase<TensorHvy> {
using DDimT = DDimHvy;
using LoDT = framework::LoD;

template <typename DType, typename DimT, TargetType Target>
void Assign(DType* data, const DimT& dim) {
Resize(dim);
auto* dst = mutable_data<DType>(Target);
CopySync<Target>(dst, data, dim.production() * sizeof(DType),
IoDirection::HtoD);
Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

这里默认的copy direction是IoDirection::HtoD,要是需要调用Assign完成HtoH或者DtoH的怎么办?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Assign 只用于 host to device 的设置

}

TargetType target() const {
if (platform::is_gpu_place(data_.place())) {
return TARGET(kCUDA);
Expand Down Expand Up @@ -95,7 +104,6 @@ class TensorHvy : public TensorBase<TensorHvy> {
const void* raw_data() const { return data_.raw_data(); }

void Resize(const DDimHvy& dims) {
LOG(INFO) << "dims.size " << dims.size();
data_.Resize(framework::make_ddim(dims.Vectorize()));
}

Expand Down
2 changes: 1 addition & 1 deletion paddle/fluid/lite/core/kernel.h
Original file line number Diff line number Diff line change
Expand Up @@ -150,7 +150,7 @@ class KernelBase {
void Torch() {}

protected:
std::unique_ptr<KernelContext> ctx_;
std::unique_ptr<KernelContext> ctx_{nullptr};
mutable operators::param_t param_;
// The corresponding op type.
std::string op_type_{};
Expand Down
8 changes: 8 additions & 0 deletions paddle/fluid/lite/core/lite_tensor.h
Original file line number Diff line number Diff line change
Expand Up @@ -61,6 +61,14 @@ class TensorLite : public TensorBase<TensorLite> {

TensorLite() : buffer_(std::make_shared<Buffer>()) {}

template <typename DType, typename DimT, TargetType Target>
void Assign(DType *data, const DimT &dim) {
Resize(dim);
auto *dst = mutable_data<DType>(Target);
CopySync<Target>(dst, data, dim.product() * sizeof(DType),
IoDirection::HtoD);
}

template <typename T>
const T *data() const {
return static_cast<const T *>(buffer_->data());
Expand Down
14 changes: 11 additions & 3 deletions paddle/fluid/lite/core/op_lite.cc
Original file line number Diff line number Diff line change
Expand Up @@ -28,15 +28,23 @@ std::vector<std::unique_ptr<KernelBase>> OpLite::CreateKernels(
CHECK(!op_type_.empty()) << "op_type_ should be set first";

auto pick_kernel = [&](const Place &place) {
auto ks = KernelRegistry::Global().Create(
(kernel_type.empty() ? op_type_ : kernel_type), place.target,
place.precision, place.layout);
auto ks = KernelRegistry::Global().Create(op_type_, place.target,
place.precision, place.layout);
for (auto &&it : ks) {
AttachKernel(it.get());
kernels.emplace_back(std::move(it));
}
};

if (!kernel_type.empty()) {
Place place;
std::string op_type, alias;
KernelBase::ParseKernelType(kernel_type, &op_type, &alias, &place);
pick_kernel(place);
CHECK(!kernels.empty()) << "no kernel for kernel type " << kernel_type;
return kernels;
}

std::set<Place> place_set;
for (auto place : places) {
place_set.insert(place);
Expand Down
9 changes: 3 additions & 6 deletions paddle/fluid/lite/core/op_registry.h
Original file line number Diff line number Diff line change
Expand Up @@ -91,9 +91,9 @@ class KernelRegistry final {
void Register(const std::string &name,
typename KernelRegistryForTarget<Target, Precision,
Layout>::creator_t &&creator) {
VLOG(3) << "register for " << TargetToStr(Target) << ":"
<< PrecisionToStr(Precision) << "//"
<< GetKernelOffset<Target, Precision, Layout>();
// VLOG(3) << "register for " << TargetToStr(Target) << ":"
//<< PrecisionToStr(Precision) << "//"
//<< GetKernelOffset<Target, Precision, Layout>();
using kernel_registor_t =
KernelRegistryForTarget<Target, Precision, Layout>;
auto &varient = registries_[GetKernelOffset<Target, Precision, Layout>()];
Expand Down Expand Up @@ -153,9 +153,6 @@ class KernelRegistor : public lite::Registor<KernelType> {
public:
KernelRegistor(const std::string &op_type, const std::string &alias)
: Registor<KernelType>([=] {
VLOG(3) << "Register kernel " << op_type << " for "
<< TargetToStr(target) << " " << PrecisionToStr(precision)
<< " " << DataLayoutToStr(layout) << " alias " << alias;
KernelRegistry::Global().Register<target, precision, layout>(
op_type, [=]() -> std::unique_ptr<KernelType> {
std::unique_ptr<KernelType> x(new KernelType);
Expand Down
8 changes: 7 additions & 1 deletion paddle/fluid/lite/core/scope.cc
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,13 @@
namespace paddle {
namespace lite {

Scope::~Scope() {}
Scope::~Scope() {
for (auto *x : kids_) {
if (x) {
delete x;
}
}
}

Scope &Scope::NewScope() const {
kids_.push_back(new Scope);
Expand Down
41 changes: 40 additions & 1 deletion paddle/fluid/lite/core/target_wrapper.h
Original file line number Diff line number Diff line change
Expand Up @@ -63,7 +63,8 @@ static const std::string& TargetToStr(TargetType target) {
}

static const std::string& PrecisionToStr(PrecisionType precision) {
static const std::string precision2string[] = {"unk", "float", "int8", "any"};
static const std::string precision2string[] = {"unk", "float", "int8_t",
"any"};
auto x = static_cast<int>(precision);
CHECK_LT(x, static_cast<int>(PRECISION(NUM)));
return precision2string[x];
Expand All @@ -76,6 +77,29 @@ static const std::string& DataLayoutToStr(DataLayoutType layout) {
return datalayout2string[x];
}

static const std::string& TargetRepr(TargetType target) {
static const std::string target2string[] = {"kUnk", "kHost", "kX86", "kCUDA",
"kAny"};
auto x = static_cast<int>(target);
CHECK_LT(x, static_cast<int>(TARGET(NUM)));
return target2string[x];
}

static const std::string& PrecisionRepr(PrecisionType precision) {
static const std::string precision2string[] = {"kUnk", "kFloat", "kInt8",
"kAny"};
auto x = static_cast<int>(precision);
CHECK_LT(x, static_cast<int>(PRECISION(NUM)));
return precision2string[x];
}

static const std::string& DataLayoutRepr(DataLayoutType layout) {
static const std::string datalayout2string[] = {"kUnk", "kNCHW", "kAny"};
auto x = static_cast<int>(layout);
CHECK_LT(x, static_cast<int>(DATALAYOUT(NUM)));
return datalayout2string[x];
}

/*
* Place specifies the execution context of a Kernel or input/output for a
* kernel. It is used to make the analysis of the MIR more clear and accurate.
Expand Down Expand Up @@ -228,5 +252,20 @@ class TargetWrapper<TARGET(kCUDA), cudaStream_t, cudaEvent_t> {
};
#endif // LITE_WITH_CUDA

template <TargetType Target>
void CopySync(void* dst, void* src, size_t size, IoDirection dir) {
switch (Target) {
case TARGET(kX86):
case TARGET(kHost):
case TARGET(kARM):
TargetWrapperX86::MemcpySync(dst, src, size, IoDirection::HtoH);
break;
#ifdef LITE_WITH_CUDA
case TARGET(kCUDA):
TargetWrapperCuda::MemcpySync(dst, src, size, dir);
#endif
}
}

} // namespace lite
} // namespace paddle
30 changes: 19 additions & 11 deletions paddle/fluid/lite/core/tensor.h
Original file line number Diff line number Diff line change
Expand Up @@ -47,7 +47,8 @@ class DDimBase {
DDimBase() = default;

explicit DDimBase(const std::vector<int64_t> &x) { self()->ConstructFrom(x); }
value_type operator[](int offset) const { return (*self())[offset]; }
value_type operator[](int offset) const { return (*const_self())[offset]; }
value_type &operator[](int offset) { return (*self())[offset]; }
std::vector<int64_t> Vectorize() const { return self()->Vectorize(); }
size_t size() const { return const_self()->size(); }
bool empty() const { return const_self()->empty(); }
Expand All @@ -73,18 +74,19 @@ class DDimBase {
{Slice(0, col).production(), Slice(col, size()).production()}));
}

friend std::ostream &operator<<(std::ostream &os, const DDimT &dims) {
if (dims.empty()) {
os << "[]";
return os;
std::string repr() const {
std::stringstream ss;
ss << "{";
for (size_t i = 0; i < this->size() - 1; i++) {
ss << (*this)[i] << ",";
}
if (!this->empty()) ss << (*this)[size() - 1];
ss << "}";
return ss.str();
}

os << "[";
for (size_t i = 0; i < dims.size() - 1; i++) {
os << dims[i] << " ";
}
if (!dims.empty()) os << dims[dims.size() - 1];
os << "]";
friend std::ostream &operator<<(std::ostream &os, const DDimT &dims) {
os << dims.repr();
return os;
}

Expand All @@ -102,6 +104,12 @@ template <typename TensorT>
class TensorBase {
public:
TensorBase() = default;

template <typename T, typename DimT>
void Assign(T *data, const DimT &dim) {
self()->Assign(data, dim);
}

TargetType target() const { return self()->target(); }

template <typename T>
Expand Down
2 changes: 1 addition & 1 deletion paddle/fluid/lite/core/variable.h
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,7 @@ namespace lite {
class Variable {
public:
template <typename T>
const T& Get() {
const T& Get() const {
return blob_.get<T>();
}

Expand Down
Loading