Skip to content
Merged
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 2 additions & 1 deletion CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -121,7 +121,8 @@ endif()
# for lite, both server and mobile framework.
option(WITH_LITE "Enable lite framework" OFF)
option(LITE_WITH_CUDA "Enable CUDA in lite mode" OFF)
option(LITE_WITH_X86 "Enable X86 in lite mode" ON)
option(LITE_WITH_X86 "Enable X86 in lite mode" ON)
option(LITE_WITH_ARM "Enable ARM in lite mode" OFF)
option(LITE_WITH_LIGHT_WEIGHT_FRAMEWORK "Enable light-weight framework" OFF)
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

比较疑惑LITE_WITH_ARMLITE_WITH_LIGHT_WEIGHT_FRAMEWORKWITH_LITE 这几个之间的关系

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

WITH_LITE 控制整个lite 目录是否会编译
LITE_WITH_LIGHT_WEIGHT_FRAMEWORK 控制是否用轻量级框架, server端也可以用轻量级
LITE_WITH_ARM 是控制是否编译 ARM 相关模块,包括kernel等。 ARM 和 X86 以及 CUDA 都是平级的硬件,本质上不做特殊支持

暂时 LITE_WITH_LIGHT_WEIGHT_FRAMEWORK + LITE_WITH_ARM 可以认为是在mobile平台上跑

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

LITE_WITH_LIGHT_WEIGHT_FRAMEWORK 控制是否用轻量级框架, server端也可以用轻量级

他指的是框架轻量级?lite本身的目的就是轻量级吧,是不是应该把他们合成一个?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

lite 之后有可能会接入训练; 目前已有的 server op 都是基于重的框架,包括 allocator, tensor, scope, platform 等,短暂没法合并。

所以暂时只能 mobile 用轻量级,后面再看看



Expand Down
4 changes: 4 additions & 0 deletions cmake/configure.cmake
Original file line number Diff line number Diff line change
Expand Up @@ -172,6 +172,10 @@ if (LITE_WITH_X86)
add_definitions("-DLITE_WITH_X86")
endif()

if (LITE_WITH_ARM)
add_definitions("-DLITE_WITH_ARM")
endif()

if (LITE_WITH_LIGHT_WEIGHT_FRAMEWORK)
add_definitions("-DLITE_WITH_LIGHT_WEIGHT_FRAMEWORK")
endif()
2 changes: 1 addition & 1 deletion cmake/generic.cmake
Original file line number Diff line number Diff line change
Expand Up @@ -427,7 +427,7 @@ function(raw_cc_test TARGET_NAME)
endif()
endfunction(raw_cc_test)

function(lite_cc_test args)
function(_lite_cc_test args)
if (LITE_WITH_LIGHT_WEIGHT_FRAMEWORK)
message(STATUS "building lite raw test: ${args}")
raw_cc_test(${args} ${ARGN})
Expand Down
4 changes: 4 additions & 0 deletions paddle/fluid/framework/operator.cc
Original file line number Diff line number Diff line change
Expand Up @@ -39,6 +39,10 @@ DEFINE_int32(inner_op_parallelism, 0, "number of threads for inner op");
namespace paddle {
namespace framework {

OpDuppy op_duppy;
Scope scope_duppy;
RuntimeContext runtime_context_duppy({}, {});

std::vector<std::tuple<platform::Place, LibraryType>> kKernelPriority = {
std::make_tuple(platform::CUDAPlace(0), LibraryType::kCUDNN),
std::make_tuple(platform::CUDAPlace(0), LibraryType::kPlain),
Expand Down
9 changes: 5 additions & 4 deletions paddle/fluid/framework/operator.h
Original file line number Diff line number Diff line change
Expand Up @@ -239,9 +239,10 @@ class OpDuppy : public OperatorBase {
void RunImpl(const Scope& scope,
const platform::Place& place) const override {}
};
OpDuppy op_duppy;
Scope scope_duppy;
RuntimeContext runtime_context_duppy({}, {});

extern OpDuppy op_duppy;
extern Scope scope_duppy;
extern RuntimeContext runtime_context_duppy;

class ExecutionContext {
public:
Expand All @@ -255,7 +256,7 @@ class ExecutionContext {
ctx_(ctx),
kernel_configs_(configs) {}

ExecutionContext(const platform::DeviceContext& device_context)
explicit ExecutionContext(const platform::DeviceContext& device_context)
: op_(op_duppy),
scope_(scope_duppy),
device_context_(device_context),
Expand Down
60 changes: 56 additions & 4 deletions paddle/fluid/lite/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -3,9 +3,10 @@ if (NOT WITH_LITE)
endif()

message(WARNING "Lite enabled!")
message(STATUS "LIGHT_FRAMEWORK: ${LITE_WITH_LIGHT_WEIGHT_FRAMEWORK}")
message(STATUS "LITE_WITH_CUDA: ${LITE_WITH_CUDA}")
message(STATUS "LITE_WITH_X86: ${LITE_WITH_X86}")
message(STATUS "LIGHT_FRAMEWORK:\t${LITE_WITH_LIGHT_WEIGHT_FRAMEWORK}")
message(STATUS "LITE_WITH_CUDA:\t${LITE_WITH_CUDA}")
message(STATUS "LITE_WITH_X86:\t${LITE_WITH_X86}")
message(STATUS "LITE_WITH_ARM:\t${LITE_WITH_ARM}")

set(LITE_MODEL_DIR "${THIRD_PARTY_PATH}/install")

Expand All @@ -29,6 +30,58 @@ function(lite_download_and_uncompress INSTALL_DIR URL FILENAME)
)
endfunction()

function (lite_deps DEPS)
set(options "")
set(oneValueArgs "")
set(multiValueArgs DEPS X86_DEPS CUDA_DEPS ARM_DEPS)
cmake_parse_arguments(lite_deps "${options}" "${oneValueArgs}" "${multiValueArgs}" ${ARGN})

set(${DEPS} ${lite_deps_DEPS} PARENT_SCOPE)
foreach(var ${lite_deps_X86_DEPS})
set(${DEPS} ${${DEPS}} ${var} PARENT_SCOPE)
endforeach()

foreach(var ${lite_deps_CUDA_DEPS})
set(${DEPS} ${${DEPS}} ${var} PARENT_SCOPE)
endforeach()

foreach(var ${lite_deps_ARM_DEPS})
set(${DEPS} ${${DEPS}} ${var} PARENT_SCOPE)
endforeach()
endfunction()

function(lite_cc_library TARGET)
set(options "")
set(oneValueArgs "")
set(multiValueArgs SRCS DEPS X86_DEPS CUDA_DEPS ARM_DEPS)
cmake_parse_arguments(args "${options}" "${oneValueArgs}" "${multiValueArgs}" ${ARGN})

set(deps "")
lite_deps(deps
DEPS ${args_DEPS}
X86_DEPS ${args_X86_DEPS}
CUDA_DEPS ${args_CUDA_DEPS}
ARM_DEPS ${args_ARM_DEPS}
)

cc_library(${TARGET} SRCS ${args_SRCS} DEPS ${deps})
endfunction()

function(lite_cc_test TARGET)
set(options "")
set(oneValueArgs "")
set(multiValueArgs SRCS DEPS X86_DEPS CUDA_DEPS ARM_DEPS)
cmake_parse_arguments(args "${options}" "${oneValueArgs}" "${multiValueArgs}" ${ARGN})

set(deps "")
lite_deps(deps
DEPS ${args_DEPS}
X86_DEPS ${args_X86_DEPS}
CUDA_DEPS ${args_CUDA_DEPS}
ARM_DEPS ${args_ARM_DEPS}
)
_lite_cc_test(${TARGET} SRCS ${args_SRCS} DEPS ${deps})
endfunction()

add_subdirectory(core)
add_subdirectory(x86)
Expand All @@ -39,4 +92,3 @@ add_subdirectory(kernels)
add_subdirectory(model_parser)
add_subdirectory(utils)
add_subdirectory(api)

34 changes: 12 additions & 22 deletions paddle/fluid/lite/api/cxx_api_bin.cc
Original file line number Diff line number Diff line change
Expand Up @@ -25,22 +25,8 @@ namespace lite {

void Run(const char* model_dir) {
lite::ExecutorLite predictor;
// #ifndef LITE_WITH_CUDA
// std::vector<Place> valid_places({Place{TARGET(kHost),
// PRECISION(kFloat)}});
// #elif defined(LITE_WITH_LIGHT_WEIGHT_FRAMEWORK)
// #else
// std::vector<Place> valid_places({
// Place{TARGET(kHost), PRECISION(kFloat), DATALAYOUT(kNCHW)},
// Place{TARGET(kCUDA), PRECISION(kFloat), DATALAYOUT(kNCHW)},
// Place{TARGET(kCUDA), PRECISION(kAny), DATALAYOUT(kNCHW)},
// Place{TARGET(kHost), PRECISION(kAny), DATALAYOUT(kNCHW)},
// Place{TARGET(kCUDA), PRECISION(kAny), DATALAYOUT(kAny)},
// Place{TARGET(kHost), PRECISION(kAny), DATALAYOUT(kAny)},
// });
// #endif

std::vector<Place> valid_places({Place{TARGET(kARM), PRECISION(kFloat)}});
std::vector<Place> valid_places({Place{TARGET(kHost), PRECISION(kFloat)},
Place{TARGET(kARM), PRECISION(kFloat)}});

predictor.Build(model_dir, Place{TARGET(kARM), PRECISION(kFloat)},
valid_places);
Expand All @@ -52,16 +38,14 @@ void Run(const char* model_dir) {
data[i] = i;
}

LOG(INFO) << "input " << *input_tensor;

predictor.Run();

auto* out = predictor.GetOutput(0);
LOG(INFO) << out << " memory size " << out->data_size();
LOG(INFO) << "out " << out->data<float>()[0];
LOG(INFO) << "out " << out->data<float>()[1];
LOG(INFO) << "dims " << out->dims();
LOG(INFO) << "out " << *out;
LOG(INFO) << "out data size: " << out->data_size();
}

} // namespace lite
Expand All @@ -79,12 +63,18 @@ USE_LITE_OP(fc);
USE_LITE_OP(scale);
USE_LITE_OP(feed);
USE_LITE_OP(fetch);
// USE_LITE_OP(io_copy);
USE_LITE_OP(io_copy);

USE_LITE_KERNEL(feed, kHost, kAny, kAny, def);
USE_LITE_KERNEL(fetch, kHost, kAny, kAny, def);

#ifdef LITE_WITH_ARM
USE_LITE_KERNEL(fc, kARM, kFloat, kNCHW, def);
USE_LITE_KERNEL(mul, kARM, kFloat, kNCHW, def);
USE_LITE_KERNEL(scale, kARM, kFloat, kNCHW, def);
USE_LITE_KERNEL(feed, kARM, kAny, kAny, def);
USE_LITE_KERNEL(fetch, kARM, kAny, kAny, def);
// USE_LITE_KERNEL(feed, kARM, kAny, kAny, def);
// USE_LITE_KERNEL(fetch, kARM, kAny, kAny, def);
#endif // LITE_WITH_ARM

#ifdef LITE_WITH_CUDA
USE_LITE_KERNEL(mul, kCUDA, kFloat, kNCHW, def);
Expand Down
6 changes: 4 additions & 2 deletions paddle/fluid/lite/core/CMakeLists.txt
Original file line number Diff line number Diff line change
@@ -1,4 +1,6 @@
cc_library(lite_gtest_main SRCS lite_gtest_main.cc DEPS gtest)
if (WITH_TESTING)
cc_library(lite_gtest_main SRCS lite_gtest_main.cc DEPS gtest)
endif()
cc_library(memory_lite SRCS memory.cc DEPS target_wrapper_lite target_wrapper_host)
cc_library(target_wrapper_lite SRCS target_wrapper.cc)
cc_library(lite_tensor SRCS lite_tensor.cc DEPS memory_lite target_wrapper_lite)
Expand Down Expand Up @@ -46,4 +48,4 @@ lite_cc_test(test_tensor_lite SRCS lite_tensor_test.cc DEPS lite_tensor)
lite_cc_test(test_type_system SRCS type_system_test.cc DEPS type_system utils_lite)
#lite_cc_test(test_optimizer_lite SRCS optimizer_test.cc DEPS mir_pass_manager program_fake_utils mir_passes optimizer_lite fc_op_lite)
lite_cc_test(test_types_lite SRCS types_test.cc DEPS types_lite)
lite_cc_test(test_memory_lite SRCS memory_test.cc DEPS memory_lite)
62 changes: 61 additions & 1 deletion paddle/fluid/lite/core/memory.cc
Original file line number Diff line number Diff line change
Expand Up @@ -15,5 +15,65 @@
#include "paddle/fluid/lite/core/memory.h"

namespace paddle {
namespace lite {} // namespace lite
namespace lite {

void* TargetMalloc(TargetType target, size_t size) {
void* data{nullptr};
switch (target) {
case TargetType::kHost:
case TargetType::kX86:
case TargetType::kARM:
data = TargetWrapper<TARGET(kHost)>::Malloc(size);
break;
#ifdef LITE_WITH_CUDA
case TargetType::kCUDA:
data =
TargetWrapper<TARGET(kCUDA), cudaStream_t, cudaEvent_t>::Malloc(size);
break;
#endif // LITE_WITH_CUDA
default:
LOG(FATAL) << "Unknown supported target " << TargetToStr(target);
}
return data;
}

void TargetFree(TargetType target, void* data) {
switch (target) {
case TargetType::kHost:
case TargetType::kX86:
case TargetType::kARM:
TargetWrapper<TARGET(kHost)>::Free(data);
break;

#ifdef LITE_WITH_CUDA
case TargetType::kCUDA:
TargetWrapper<TARGET(kX86)>::Free(data);
break;
#endif // LITE_WITH_CUDA
default:
LOG(FATAL) << "Unknown type";
}
}

void TargetCopy(TargetType target, void* dst, const void* src, size_t size) {
switch (target) {
case TargetType::kHost:
case TargetType::kX86:
case TargetType::kARM:
TargetWrapper<TARGET(kHost)>::MemcpySync(dst, src, size,
IoDirection::DtoD);
break;

#ifdef LITE_WITH_CUDA
case TargetType::kCUDA:
TargetWrapper<TARGET(kCUDA)>::MemcpySync(dst, src, size,
IoDirection::DtoD);
break;
#endif
default:
LOG(FATAL) << "unsupported type";
}
}

} // namespace lite
} // namespace paddle
57 changes: 8 additions & 49 deletions paddle/fluid/lite/core/memory.h
Original file line number Diff line number Diff line change
Expand Up @@ -18,57 +18,16 @@
namespace paddle {
namespace lite {

static void* TargetMalloc(TargetType target, size_t size) {
void* data{nullptr};
switch (target) {
case TargetType::kHost:
#ifdef LITE_WITH_X86
case TargetType::kX86:
#endif
data = TargetWrapper<TARGET(kHost)>::Malloc(size);
break;
#ifdef LITE_WITH_CUDA
case TargetType::kCUDA:
data =
TargetWrapper<TARGET(kCUDA), cudaStream_t, cudaEvent_t>::Malloc(size);
break;
#endif // LITE_WITH_CUDA
default:
LOG(FATAL) << "Unknown supported target " << TargetToStr(target);
}
return data;
}

static void TargetFree(TargetType target, void* data) {
switch (static_cast<int>(target)) {
case static_cast<int>(TargetType::kX86):
TargetWrapper<TARGET(kX86)>::Free(data);
break;
case static_cast<int>(TargetType::kCUDA):
TargetWrapper<TARGET(kX86)>::Free(data);
break;
default:
LOG(FATAL) << "Unknown type";
}
}
// Malloc memory for a specific Target. All the targets should be an element in
// the `switch` here.
void* TargetMalloc(TargetType target, size_t size);

static void TargetCopy(TargetType target, void* dst, const void* src,
size_t size) {
switch (target) {
case TargetType::kX86:
case TargetType::kHost:
TargetWrapper<TARGET(kHost)>::MemcpySync(dst, src, size,
IoDirection::DtoD);
break;
// Free memory for a specific Target. All the targets should be an element in
// the `switch` here.
void TargetFree(TargetType target, void* data);

case TargetType::kCUDA:
TargetWrapper<TARGET(kCUDA)>::MemcpySync(dst, src, size,
IoDirection::DtoD);
break;
default:
LOG(FATAL) << "unsupported type";
}
}
// Copy a buffer from host to another target.
void TargetCopy(TargetType target, void* dst, const void* src, size_t size);

// Memory buffer manager.
class Buffer {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -12,4 +12,23 @@
// See the License for the specific language governing permissions and
// limitations under the License.

#include "paddle/fluid/lite/kernels/host/relu_compute.h"
#include "paddle/fluid/lite/core/memory.h"
#include <gtest/gtest.h>

namespace paddle {
namespace lite {

TEST(memory, test) {
auto* buf = TargetMalloc(TARGET(kX86), 10);
ASSERT_TRUE(buf);
TargetFree(TARGET(kX86), buf);

#ifdef LITE_WITH_CUDA
auto* buf_cuda = TargetMalloc(TARGET(kCUDA), 10);
ASSERT_TRUE(buf_cuda);
TargetFree(Target(kCUDA), buf_cuda);
#endif
}

} // namespace lite
} // namespace paddle
7 changes: 0 additions & 7 deletions paddle/fluid/lite/kernels/arm/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -9,14 +9,7 @@ cc_library(relu_compute_arm SRCS relu_compute.cc DEPS ${lite_kernel_deps})
cc_library(mul_compute_arm SRCS mul_compute.cc DEPS ${lite_kernel_deps} eigen3)
cc_library(scale_compute_arm SRCS scale_compute.cc DEPS ${lite_kernel_deps} eigen3)

cc_library(feed_compute_arm SRCS feed_compute.cc DEPS ${lite_kernel_deps})
cc_library(fetch_compute_arm SRCS fetch_compute.cc DEPS ${lite_kernel_deps})

# lite_cc_test(test_fc_compute_arm SRCS fc_compute_test.cc DEPS ${lite_kernel_deps} fc_compute_arm)

set(arm_kernels
feed_compute_arm
fetch_compute_arm
fc_compute_arm
relu_compute_arm
mul_compute_arm
Expand Down
Loading