Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 2 additions & 1 deletion CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -121,7 +121,8 @@ endif()
# for lite, both server and mobile framework.
option(WITH_LITE "Enable lite framework" OFF)
option(LITE_WITH_CUDA "Enable CUDA in lite mode" OFF)
option(LITE_WITH_X86 "Enable X86 in lite mode" ON)
option(LITE_WITH_X86 "Enable X86 in lite mode" ON)
option(LITE_WITH_ARM "Enable ARM in lite mode" OFF)
option(LITE_WITH_LIGHT_WEIGHT_FRAMEWORK "Enable light-weight framework" OFF)
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

比较疑惑LITE_WITH_ARMLITE_WITH_LIGHT_WEIGHT_FRAMEWORKWITH_LITE 这几个之间的关系

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

WITH_LITE 控制整个lite 目录是否会编译
LITE_WITH_LIGHT_WEIGHT_FRAMEWORK 控制是否用轻量级框架, server端也可以用轻量级
LITE_WITH_ARM 是控制是否编译 ARM 相关模块,包括kernel等。 ARM 和 X86 以及 CUDA 都是平级的硬件,本质上不做特殊支持

暂时 LITE_WITH_LIGHT_WEIGHT_FRAMEWORK + LITE_WITH_ARM 可以认为是在mobile平台上跑

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

LITE_WITH_LIGHT_WEIGHT_FRAMEWORK 控制是否用轻量级框架, server端也可以用轻量级

他指的是框架轻量级?lite本身的目的就是轻量级吧,是不是应该把他们合成一个?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

lite 之后有可能会接入训练; 目前已有的 server op 都是基于重的框架,包括 allocator, tensor, scope, platform 等,短暂没法合并。

所以暂时只能 mobile 用轻量级,后面再看看



Expand Down
4 changes: 4 additions & 0 deletions cmake/configure.cmake
Original file line number Diff line number Diff line change
Expand Up @@ -172,6 +172,10 @@ if (LITE_WITH_X86)
add_definitions("-DLITE_WITH_X86")
endif()

if (LITE_WITH_ARM)
add_definitions("-DLITE_WITH_ARM")
endif()

if (LITE_WITH_LIGHT_WEIGHT_FRAMEWORK)
add_definitions("-DLITE_WITH_LIGHT_WEIGHT_FRAMEWORK")
endif()
2 changes: 1 addition & 1 deletion cmake/generic.cmake
Original file line number Diff line number Diff line change
Expand Up @@ -427,7 +427,7 @@ function(raw_cc_test TARGET_NAME)
endif()
endfunction(raw_cc_test)

function(lite_cc_test args)
function(_lite_cc_test args)
if (LITE_WITH_LIGHT_WEIGHT_FRAMEWORK)
message(STATUS "building lite raw test: ${args}")
raw_cc_test(${args} ${ARGN})
Expand Down
4 changes: 4 additions & 0 deletions paddle/fluid/framework/operator.cc
Original file line number Diff line number Diff line change
Expand Up @@ -39,6 +39,10 @@ DEFINE_int32(inner_op_parallelism, 0, "number of threads for inner op");
namespace paddle {
namespace framework {

OpDuppy op_duppy;
Scope scope_duppy;
RuntimeContext runtime_context_duppy({}, {});

std::vector<std::tuple<platform::Place, LibraryType>> kKernelPriority = {
std::make_tuple(platform::CUDAPlace(0), LibraryType::kCUDNN),
std::make_tuple(platform::CUDAPlace(0), LibraryType::kPlain),
Expand Down
9 changes: 5 additions & 4 deletions paddle/fluid/framework/operator.h
Original file line number Diff line number Diff line change
Expand Up @@ -239,9 +239,10 @@ class OpDuppy : public OperatorBase {
void RunImpl(const Scope& scope,
const platform::Place& place) const override {}
};
OpDuppy op_duppy;
Scope scope_duppy;
RuntimeContext runtime_context_duppy({}, {});

extern OpDuppy op_duppy;
extern Scope scope_duppy;
extern RuntimeContext runtime_context_duppy;

class ExecutionContext {
public:
Expand All @@ -255,7 +256,7 @@ class ExecutionContext {
ctx_(ctx),
kernel_configs_(configs) {}

ExecutionContext(const platform::DeviceContext& device_context)
explicit ExecutionContext(const platform::DeviceContext& device_context)
: op_(op_duppy),
scope_(scope_duppy),
device_context_(device_context),
Expand Down
67 changes: 63 additions & 4 deletions paddle/fluid/lite/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -3,9 +3,10 @@ if (NOT WITH_LITE)
endif()

message(WARNING "Lite enabled!")
message(STATUS "LIGHT_FRAMEWORK: ${LITE_WITH_LIGHT_WEIGHT_FRAMEWORK}")
message(STATUS "LITE_WITH_CUDA: ${LITE_WITH_CUDA}")
message(STATUS "LITE_WITH_X86: ${LITE_WITH_X86}")
message(STATUS "LIGHT_FRAMEWORK:\t${LITE_WITH_LIGHT_WEIGHT_FRAMEWORK}")
message(STATUS "LITE_WITH_CUDA:\t${LITE_WITH_CUDA}")
message(STATUS "LITE_WITH_X86:\t${LITE_WITH_X86}")
message(STATUS "LITE_WITH_ARM:\t${LITE_WITH_ARM}")

set(LITE_MODEL_DIR "${THIRD_PARTY_PATH}/install")

Expand All @@ -29,6 +30,65 @@ function(lite_download_and_uncompress INSTALL_DIR URL FILENAME)
)
endfunction()

function (lite_deps DEPS)
set(options "")
set(oneValueArgs "")
set(multiValueArgs DEPS X86_DEPS CUDA_DEPS ARM_DEPS)
cmake_parse_arguments(lite_deps "${options}" "${oneValueArgs}" "${multiValueArgs}" ${ARGN})

set(${DEPS} ${lite_deps_DEPS} PARENT_SCOPE)

if(LITE_WITH_X86)
foreach(var ${lite_deps_X86_DEPS})
set(${DEPS} ${${DEPS}} ${var} PARENT_SCOPE)
endforeach()
endif()

if(LITE_WITH_CUDA)
foreach(var ${lite_deps_CUDA_DEPS})
set(${DEPS} ${${DEPS}} ${var} PARENT_SCOPE)
endforeach()
endif()

if(LITE_WITH_ARM)
foreach(var ${lite_deps_ARM_DEPS})
set(${DEPS} ${${DEPS}} ${var} PARENT_SCOPE)
endforeach()
endif()
endfunction()

function(lite_cc_library TARGET)
set(options "")
set(oneValueArgs "")
set(multiValueArgs SRCS DEPS X86_DEPS CUDA_DEPS ARM_DEPS)
cmake_parse_arguments(args "${options}" "${oneValueArgs}" "${multiValueArgs}" ${ARGN})

set(deps "")
lite_deps(deps
DEPS ${args_DEPS}
X86_DEPS ${args_X86_DEPS}
CUDA_DEPS ${args_CUDA_DEPS}
ARM_DEPS ${args_ARM_DEPS}
)

cc_library(${TARGET} SRCS ${args_SRCS} DEPS ${deps})
endfunction()

function(lite_cc_test TARGET)
set(options "")
set(oneValueArgs "")
set(multiValueArgs SRCS DEPS X86_DEPS CUDA_DEPS ARM_DEPS)
cmake_parse_arguments(args "${options}" "${oneValueArgs}" "${multiValueArgs}" ${ARGN})

set(deps "")
lite_deps(deps
DEPS ${args_DEPS}
X86_DEPS ${args_X86_DEPS}
CUDA_DEPS ${args_CUDA_DEPS}
ARM_DEPS ${args_ARM_DEPS}
)
_lite_cc_test(${TARGET} SRCS ${args_SRCS} DEPS ${deps})
endfunction()

add_subdirectory(core)
add_subdirectory(x86)
Expand All @@ -39,4 +99,3 @@ add_subdirectory(kernels)
add_subdirectory(model_parser)
add_subdirectory(utils)
add_subdirectory(api)

34 changes: 12 additions & 22 deletions paddle/fluid/lite/api/cxx_api_bin.cc
Original file line number Diff line number Diff line change
Expand Up @@ -25,22 +25,8 @@ namespace lite {

void Run(const char* model_dir) {
lite::ExecutorLite predictor;
// #ifndef LITE_WITH_CUDA
// std::vector<Place> valid_places({Place{TARGET(kHost),
// PRECISION(kFloat)}});
// #elif defined(LITE_WITH_LIGHT_WEIGHT_FRAMEWORK)
// #else
// std::vector<Place> valid_places({
// Place{TARGET(kHost), PRECISION(kFloat), DATALAYOUT(kNCHW)},
// Place{TARGET(kCUDA), PRECISION(kFloat), DATALAYOUT(kNCHW)},
// Place{TARGET(kCUDA), PRECISION(kAny), DATALAYOUT(kNCHW)},
// Place{TARGET(kHost), PRECISION(kAny), DATALAYOUT(kNCHW)},
// Place{TARGET(kCUDA), PRECISION(kAny), DATALAYOUT(kAny)},
// Place{TARGET(kHost), PRECISION(kAny), DATALAYOUT(kAny)},
// });
// #endif

std::vector<Place> valid_places({Place{TARGET(kARM), PRECISION(kFloat)}});
std::vector<Place> valid_places({Place{TARGET(kHost), PRECISION(kFloat)},
Place{TARGET(kARM), PRECISION(kFloat)}});

predictor.Build(model_dir, Place{TARGET(kARM), PRECISION(kFloat)},
valid_places);
Expand All @@ -52,16 +38,14 @@ void Run(const char* model_dir) {
data[i] = i;
}

LOG(INFO) << "input " << *input_tensor;

predictor.Run();

auto* out = predictor.GetOutput(0);
LOG(INFO) << out << " memory size " << out->data_size();
LOG(INFO) << "out " << out->data<float>()[0];
LOG(INFO) << "out " << out->data<float>()[1];
LOG(INFO) << "dims " << out->dims();
LOG(INFO) << "out " << *out;
LOG(INFO) << "out data size: " << out->data_size();
}

} // namespace lite
Expand All @@ -79,12 +63,18 @@ USE_LITE_OP(fc);
USE_LITE_OP(scale);
USE_LITE_OP(feed);
USE_LITE_OP(fetch);
// USE_LITE_OP(io_copy);
USE_LITE_OP(io_copy);

USE_LITE_KERNEL(feed, kHost, kAny, kAny, def);
USE_LITE_KERNEL(fetch, kHost, kAny, kAny, def);

#ifdef LITE_WITH_ARM
USE_LITE_KERNEL(fc, kARM, kFloat, kNCHW, def);
USE_LITE_KERNEL(mul, kARM, kFloat, kNCHW, def);
USE_LITE_KERNEL(scale, kARM, kFloat, kNCHW, def);
USE_LITE_KERNEL(feed, kARM, kAny, kAny, def);
USE_LITE_KERNEL(fetch, kARM, kAny, kAny, def);
// USE_LITE_KERNEL(feed, kARM, kAny, kAny, def);
// USE_LITE_KERNEL(fetch, kARM, kAny, kAny, def);
#endif // LITE_WITH_ARM

#ifdef LITE_WITH_CUDA
USE_LITE_KERNEL(mul, kCUDA, kFloat, kNCHW, def);
Expand Down
12 changes: 7 additions & 5 deletions paddle/fluid/lite/core/CMakeLists.txt
Original file line number Diff line number Diff line change
@@ -1,6 +1,8 @@
cc_library(lite_gtest_main SRCS lite_gtest_main.cc DEPS gtest)
cc_library(memory_lite SRCS memory.cc DEPS target_wrapper_lite target_wrapper_host)
cc_library(target_wrapper_lite SRCS target_wrapper.cc)
if (WITH_TESTING)
cc_library(lite_gtest_main SRCS lite_gtest_main.cc DEPS gtest)
endif()
cc_library(memory_lite SRCS memory.cc DEPS target_wrapper_lite)
lite_cc_library(target_wrapper_lite SRCS target_wrapper.cc DEPS target_wrapper_host X86_DEPS target_wrapper_x86 CUDA_DEPS target_wrapper_cuda)
cc_library(lite_tensor SRCS lite_tensor.cc DEPS memory_lite target_wrapper_lite)
if (NOT LITE_WITH_LIGHT_WEIGHT_FRAMEWORK)
cc_library(hvy_tensor SRCS hvy_tensor.cc DEPS lod_tensor)
Expand Down Expand Up @@ -40,10 +42,10 @@ cc_library(program_fake_utils SRCS program_fake_utils.cc DEPS mir_ssa_graph
)

lite_cc_test(test_scope_lite SRCS scope_test.cc DEPS scope_lite)
lite_cc_test(test_kernel_lite SRCS kernel_test.cc DEPS kernel_lite target_wrapper_x86)
lite_cc_test(test_kernel_lite SRCS kernel_test.cc DEPS kernel_lite target_wrapper_lite)
lite_cc_test(test_op_lite SRCS op_lite_test.cc DEPS op_lite)
lite_cc_test(test_tensor_lite SRCS lite_tensor_test.cc DEPS lite_tensor)
lite_cc_test(test_type_system SRCS type_system_test.cc DEPS type_system utils_lite)
#lite_cc_test(test_optimizer_lite SRCS optimizer_test.cc DEPS mir_pass_manager program_fake_utils mir_passes optimizer_lite fc_op_lite)
lite_cc_test(test_types_lite SRCS types_test.cc DEPS types_lite)
lite_cc_test(test_memory_lite SRCS memory_test.cc DEPS memory_lite)
62 changes: 61 additions & 1 deletion paddle/fluid/lite/core/memory.cc
Original file line number Diff line number Diff line change
Expand Up @@ -15,5 +15,65 @@
#include "paddle/fluid/lite/core/memory.h"

namespace paddle {
namespace lite {} // namespace lite
namespace lite {

void* TargetMalloc(TargetType target, size_t size) {
void* data{nullptr};
switch (target) {
case TargetType::kHost:
case TargetType::kX86:
case TargetType::kARM:
data = TargetWrapper<TARGET(kHost)>::Malloc(size);
break;
#ifdef LITE_WITH_CUDA
case TargetType::kCUDA:
data =
TargetWrapper<TARGET(kCUDA), cudaStream_t, cudaEvent_t>::Malloc(size);
break;
#endif // LITE_WITH_CUDA
default:
LOG(FATAL) << "Unknown supported target " << TargetToStr(target);
}
return data;
}

void TargetFree(TargetType target, void* data) {
switch (target) {
case TargetType::kHost:
case TargetType::kX86:
case TargetType::kARM:
TargetWrapper<TARGET(kHost)>::Free(data);
break;

#ifdef LITE_WITH_CUDA
case TargetType::kCUDA:
TargetWrapper<TARGET(kX86)>::Free(data);
break;
#endif // LITE_WITH_CUDA
default:
LOG(FATAL) << "Unknown type";
}
}

void TargetCopy(TargetType target, void* dst, const void* src, size_t size) {
switch (target) {
case TargetType::kHost:
case TargetType::kX86:
case TargetType::kARM:
TargetWrapper<TARGET(kHost)>::MemcpySync(dst, src, size,
IoDirection::DtoD);
break;

#ifdef LITE_WITH_CUDA
case TargetType::kCUDA:
TargetWrapper<TARGET(kCUDA)>::MemcpySync(dst, src, size,
IoDirection::DtoD);
break;
#endif
default:
LOG(FATAL) << "unsupported type";
}
}

} // namespace lite
} // namespace paddle
57 changes: 8 additions & 49 deletions paddle/fluid/lite/core/memory.h
Original file line number Diff line number Diff line change
Expand Up @@ -18,57 +18,16 @@
namespace paddle {
namespace lite {

static void* TargetMalloc(TargetType target, size_t size) {
void* data{nullptr};
switch (target) {
case TargetType::kHost:
#ifdef LITE_WITH_X86
case TargetType::kX86:
#endif
data = TargetWrapper<TARGET(kHost)>::Malloc(size);
break;
#ifdef LITE_WITH_CUDA
case TargetType::kCUDA:
data =
TargetWrapper<TARGET(kCUDA), cudaStream_t, cudaEvent_t>::Malloc(size);
break;
#endif // LITE_WITH_CUDA
default:
LOG(FATAL) << "Unknown supported target " << TargetToStr(target);
}
return data;
}

static void TargetFree(TargetType target, void* data) {
switch (static_cast<int>(target)) {
case static_cast<int>(TargetType::kX86):
TargetWrapper<TARGET(kX86)>::Free(data);
break;
case static_cast<int>(TargetType::kCUDA):
TargetWrapper<TARGET(kX86)>::Free(data);
break;
default:
LOG(FATAL) << "Unknown type";
}
}
// Malloc memory for a specific Target. All the targets should be an element in
// the `switch` here.
void* TargetMalloc(TargetType target, size_t size);

static void TargetCopy(TargetType target, void* dst, const void* src,
size_t size) {
switch (target) {
case TargetType::kX86:
case TargetType::kHost:
TargetWrapper<TARGET(kHost)>::MemcpySync(dst, src, size,
IoDirection::DtoD);
break;
// Free memory for a specific Target. All the targets should be an element in
// the `switch` here.
void TargetFree(TargetType target, void* data);

case TargetType::kCUDA:
TargetWrapper<TARGET(kCUDA)>::MemcpySync(dst, src, size,
IoDirection::DtoD);
break;
default:
LOG(FATAL) << "unsupported type";
}
}
// Copy a buffer from host to another target.
void TargetCopy(TargetType target, void* dst, const void* src, size_t size);

// Memory buffer manager.
class Buffer {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -12,4 +12,23 @@
// See the License for the specific language governing permissions and
// limitations under the License.

#include "paddle/fluid/lite/kernels/host/relu_compute.h"
#include "paddle/fluid/lite/core/memory.h"
#include <gtest/gtest.h>

namespace paddle {
namespace lite {

TEST(memory, test) {
auto* buf = TargetMalloc(TARGET(kX86), 10);
ASSERT_TRUE(buf);
TargetFree(TARGET(kX86), buf);

#ifdef LITE_WITH_CUDA
auto* buf_cuda = TargetMalloc(TARGET(kCUDA), 10);
ASSERT_TRUE(buf_cuda);
TargetFree(Target(kCUDA), buf_cuda);
#endif
}

} // namespace lite
} // namespace paddle
Loading