Skip to content

Commit 86d6b14

Browse files
committed
follow comments
1 parent 136524b commit 86d6b14

12 files changed

Lines changed: 72 additions & 74 deletions

File tree

paddle/fluid/lite/CMakeLists.txt

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -118,6 +118,7 @@ endfunction()
118118

119119
add_subdirectory(core)
120120
add_subdirectory(x86)
121+
add_subdirectory(arm)
121122
add_subdirectory(host)
122123
add_subdirectory(cuda)
123124
add_subdirectory(operators)
Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,2 @@
1+
2+
add_subdirectory(math)
Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,2 +1,2 @@
11

2-
cc_library(math_arm SRCS funcs.cc packed_sgemm.cc DEPS ${lite_kernel_deps})
2+
cc_library(math_arm SRCS funcs.cc packed_sgemm.cc DEPS ${lite_kernel_deps} eigen3)

paddle/fluid/lite/kernels/arm/math/funcs.cc renamed to paddle/fluid/lite/arm/math/funcs.cc

Lines changed: 1 addition & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -12,12 +12,11 @@
1212
// See the License for the specific language governing permissions and
1313
// limitations under the License.
1414

15-
#include "paddle/fluid/lite/kernels/arm/math/funcs.h"
15+
#include "paddle/fluid/lite/arm/math/funcs.h"
1616
#include <arm_neon.h>
1717

1818
namespace paddle {
1919
namespace lite {
20-
namespace kernels {
2120
namespace arm {
2221
namespace math {
2322

@@ -153,6 +152,5 @@ void fill_bias_fc<int>(int *tensor, const int *bias, const int num,
153152

154153
} // namespace math
155154
} // namespace arm
156-
} // namespace kernels
157155
} // namespace lite
158156
} // namespace paddle

paddle/fluid/lite/kernels/arm/math/funcs.h renamed to paddle/fluid/lite/arm/math/funcs.h

Lines changed: 23 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -14,20 +14,40 @@
1414

1515
#pragma once
1616

17+
#include <Eigen/Core>
1718
#include <cmath>
18-
#include "paddle/fluid/lite/kernels/arm/math/packed_sgemm.h"
19+
20+
#include "paddle/fluid/lite/arm/math/packed_sgemm.h"
1921

2022
namespace paddle {
2123
namespace lite {
22-
namespace kernels {
2324
namespace arm {
2425
namespace math {
2526

2627
template <typename T>
2728
void fill_bias_fc(T* tensor, const T* bias, const int num, const int channel);
2829

30+
template <typename T>
31+
void fc_compute_eigen(const T* x, int x_h, int x_w, //
32+
const T* w, int w_h, int w_w, //
33+
const T* b, //
34+
T* out) {
35+
using matrix_t =
36+
Eigen::Matrix<T, Eigen::Dynamic, Eigen::Dynamic, Eigen::RowMajor>;
37+
38+
Eigen::Map<const matrix_t> X(x, x_h, x_w);
39+
Eigen::Map<const matrix_t> W(w, w_h, w_w);
40+
Eigen::Map<matrix_t> Out(out, x_h, w_w);
41+
42+
Out = X * W;
43+
44+
if (b) {
45+
Eigen::Map<const Eigen::Matrix<T, 1, Eigen::Dynamic>> B(b, w_w);
46+
Out = Out.array().rowwise() + B.array();
47+
}
48+
}
49+
2950
} // namespace math
3051
} // namespace arm
31-
} // namespace kernels
3252
} // namespace lite
3353
} // namespace paddle

paddle/fluid/lite/kernels/arm/math/packed_sgemm.cc renamed to paddle/fluid/lite/arm/math/packed_sgemm.cc

Lines changed: 9 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -12,12 +12,11 @@
1212
// See the License for the specific language governing permissions and
1313
// limitations under the License.
1414

15-
#include "paddle/fluid/lite/kernels/arm/math/packed_sgemm.h"
15+
#include "paddle/fluid/lite/arm/math/packed_sgemm.h"
1616
#include <arm_neon.h>
1717

1818
namespace paddle {
1919
namespace lite {
20-
namespace kernels {
2120
namespace arm {
2221
namespace math {
2322

@@ -68,7 +67,7 @@ void prepackA(float *out, const float *in, const int ldin, const int m0,
6867
prepackA_8x12(out, in, ldin, m0, mmax, k0, kmax);
6968
}
7069
#else
71-
if (ctx->get_arch() == kA73) {
70+
if (ctx->arch() == kA73) {
7271
if (is_trans) {
7372
prepackA_trans_4x8(out, in, ldin, m0, mmax, k0, kmax);
7473
} else {
@@ -86,7 +85,7 @@ void prepackA(float *out, const float *in, const int ldin, const int m0,
8685

8786
void prepackA(TensorLite *tout, const TensorLite &tin, int m, int k, int group,
8887
bool is_trans, ARMContext *ctx) {
89-
int hblock = get_hblock(ctx->get_arch());
88+
int hblock = get_hblock(ctx->arch());
9089
int m_roundup = hblock * ((m + hblock - 1) / hblock);
9190
int group_size_round_up = ((m_roundup * k + 15) / 16) * 16;
9291
if (tout->numel() < group_size_round_up * group) {
@@ -112,7 +111,7 @@ void sgemm_prepack(const float *A_packed, const float *B, const float *bias,
112111
sgemm_conv_8x12(A_packed, B, bias, C, M, N, K, is_bias, is_relu, is_transB,
113112
ctx);
114113
#else // armv7
115-
if (ctx->get_arch() == kA73) {
114+
if (ctx->arch() == kA73) {
116115
sgemm_conv_4x8(A_packed, B, bias, C, M, N, K, is_bias, is_relu, is_transB,
117116
ctx);
118117
} else {
@@ -1521,8 +1520,8 @@ void sgemm_conv_8x12(const float *A_packed, const float *B, const float *bias,
15211520
bool transB, ARMContext *ctx) {
15221521
size_t l2_cache =
15231522
ctx->l2_cache_size() > 0 ? ctx->l2_cache_size() : 512 * 1024;
1524-
float *workspace = ctx->get_workspace_data<float>();
1525-
int threads = ctx->get_threads();
1523+
float *workspace = ctx->workspace_data<float>();
1524+
int threads = ctx->threads();
15261525
//! MBLOCK * x (result) + MBLOCK * k (A) + x * k (B) = l2
15271526
int x_block = (l2_cache - (MBLOCK * K)) / (sizeof(float) * (K + MBLOCK));
15281527
x_block /= NBLOCK;
@@ -2359,8 +2358,8 @@ void sgemm_conv_6x8(const float* A_packed, const float* B, const float* bias,
23592358
bool transB, ARMContext* ctx) {
23602359
size_t l2_cache =
23612360
ctx->l2_cache_size() > 0 ? ctx->l2_cache_size() : 512 * 1024;
2362-
auto* workspace = ctx->get_workspace_data<float>();
2363-
int threads = ctx->get_threads();
2361+
auto* workspace = ctx->workspace_data<float>();
2362+
int threads = ctx->threads();
23642363
//! MBLOCK * x (result) + MBLOCK * k (A) + x * k (B) = l2
23652364
int x_block =
23662365
(l2_cache - (MBLOCK_OTH * K)) / (sizeof(float) * (K + MBLOCK_OTH));
@@ -2753,7 +2752,7 @@ void sgemm_conv_4x8(const float* A_packed, const float* B, const float* bias,
27532752
size_t l2_cache =
27542753
ctx->l2_cache_size() > 0 ? ctx->l2_cache_size() : 512 * 1024;
27552754
void* workspace = ctx->get_work_space();
2756-
int threads = ctx->get_threads();
2755+
int threads = ctx->threads();
27572756
//! MBLOCK * x (result) + MBLOCK * k (A) + x * k (B) = l2
27582757
int x_block =
27592758
(l2_cache - (MBLOCK_A73 * K)) / (sizeof(float) * (K + MBLOCK_A73));
@@ -3046,6 +3045,5 @@ void sgemm_conv_4x8(const float* A_packed, const float* B, const float* bias,
30463045

30473046
} // namespace math
30483047
} // namespace arm
3049-
} // namespace kernels
30503048
} // namespace lite
30513049
} // namespace paddle

paddle/fluid/lite/kernels/arm/math/packed_sgemm.h renamed to paddle/fluid/lite/arm/math/packed_sgemm.h

Lines changed: 0 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -21,7 +21,6 @@
2121

2222
namespace paddle {
2323
namespace lite {
24-
namespace kernels {
2524
namespace arm {
2625
namespace math {
2726

@@ -57,6 +56,5 @@ void sgemm_prepack(const float* A_packed, const float* B, const float* bias,
5756

5857
} // namespace math
5958
} // namespace arm
60-
} // namespace kernels
6159
} // namespace lite
6260
} // namespace paddle

paddle/fluid/lite/core/context.cc

Lines changed: 9 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -33,7 +33,7 @@ namespace lite {
3333

3434
#ifdef LITE_WITH_ARM
3535

36-
void ARMContext::set_cache(int l1size, int l2size, int l3size) {
36+
void ARMContext::SetCache(int l1size, int l2size, int l3size) {
3737
DeviceInfo& dev = DeviceInfo::Global();
3838
int cpu_count = arm_get_cpucount();
3939
dev.L1_cache_.resize(cpu_count);
@@ -62,9 +62,9 @@ ARMContext::ARMContext() {
6262
#endif
6363
}
6464

65-
PowerMode ARMContext::get_mode() const { return mode_; }
65+
PowerMode ARMContext::mode() const { return mode_; }
6666

67-
int ARMContext::get_threads() const { return active_ids_.size(); }
67+
int ARMContext::threads() const { return active_ids_.size(); }
6868

6969
ARMContext::ARMContext(const ARMContext& ctx) {
7070
mode_ = ctx.mode_;
@@ -83,7 +83,7 @@ ARMContext& ARMContext::operator=(const ARMContext& ctx) {
8383
return *this;
8484
}
8585

86-
void ARMContext::bind_dev() {
86+
void ARMContext::BindDev() {
8787
#ifdef USE_OPENMP
8888
int num_threads = active_ids_.size();
8989
omp_set_num_threads(num_threads);
@@ -116,7 +116,7 @@ void ARMContext::bind_dev() {
116116
#endif // USE_OPENMP
117117
}
118118

119-
void ARMContext::set_run_mode(PowerMode mode, int threads) {
119+
void ARMContext::SetRunMode(PowerMode mode, int threads) {
120120
DeviceInfo& dev = DeviceInfo::Global();
121121
int big_core_size = dev.big_core_ids_.size();
122122
int small_core_size = dev.little_core_ids_.size();
@@ -271,7 +271,7 @@ void ARMContext::set_run_mode(PowerMode mode, int threads) {
271271
omp_set_num_threads(threads);
272272
} else {
273273
if (check_online(active_ids_)) {
274-
bind_dev();
274+
BindDev();
275275
} else {
276276
LOG(ERROR) << "core id " << active_ids_[0]
277277
<< " is offline, switch to NO BIND MODE";
@@ -293,9 +293,9 @@ void ARMContext::set_run_mode(PowerMode mode, int threads) {
293293
arch_ = DeviceInfo::Global().archs_[active_ids_[0]];
294294
}
295295

296-
ARMArch ARMContext::get_arch() const { return arch_; }
296+
ARMArch ARMContext::arch() const { return arch_; }
297297

298-
void ARMContext::set_arch(ARMArch arch) { arch_ = arch; }
298+
void ARMContext::SetArch(ARMArch arch) { arch_ = arch; }
299299

300300
int ARMContext::l1_cache_size() const {
301301
DeviceInfo& dev = DeviceInfo::Global();
@@ -312,7 +312,7 @@ int ARMContext::l3_cache_size() const {
312312
return dev.L3_cache_[active_ids_[0]];
313313
}
314314

315-
bool ARMContext::workspace_extend(DDimLite dims) {
315+
bool ARMContext::ExtendWorkspace(DDimLite dims) {
316316
auto count = dims.product();
317317
auto old = workspace_.dims();
318318
if (count == old.product()) {

paddle/fluid/lite/core/context.h

Lines changed: 15 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -45,26 +45,29 @@ struct ARMContext {
4545

4646
ARMContext& operator=(const ARMContext& ctx);
4747

48-
void set_run_mode(PowerMode mode, int threads);
49-
void bind_dev();
50-
PowerMode get_mode() const;
51-
int get_threads() const;
52-
void set_cache(int l1size, int l2size, int l3size);
48+
void SetRunMode(PowerMode mode, int threads);
49+
void SetCache(int l1size, int l2size, int l3size);
50+
void SetArch(ARMArch arch);
51+
void BindDev();
52+
53+
PowerMode mode() const;
54+
int threads() const;
55+
ARMArch arch() const;
56+
5357
template <typename T>
54-
T* get_workspace_data() {
58+
T* workspace_data() {
5559
return workspace_.mutable_data<T>();
5660
}
57-
ARMArch get_arch() const;
58-
void set_arch(ARMArch arch);
61+
5962
int l1_cache_size() const;
6063
int l2_cache_size() const;
6164
int l3_cache_size() const;
62-
bool workspace_extend(DDimLite dims);
65+
bool ExtendWorkspace(DDimLite dims);
6366

6467
private:
65-
//! LITE_POWER_HIGH stands for using big cores,
66-
//! LITE_POWER_LOW stands for using small core,
67-
//! LITE_POWER_FULL stands for using all cores
68+
// LITE_POWER_HIGH stands for using big cores,
69+
// LITE_POWER_LOW stands for using small core,
70+
// LITE_POWER_FULL stands for using all cores
6871
ARMArch arch_;
6972
PowerMode mode_;
7073
std::vector<int> active_ids_;

paddle/fluid/lite/kernels/arm/CMakeLists.txt

Lines changed: 0 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -4,8 +4,6 @@ endif()
44

55
message(STATUS "compile with lite ARM kernels")
66

7-
add_subdirectory(math)
8-
97
cc_library(fc_compute_arm SRCS fc_compute.cc DEPS ${lite_kernel_deps} math_arm)
108
cc_library(relu_compute_arm SRCS relu_compute.cc DEPS ${lite_kernel_deps})
119
cc_library(mul_compute_arm SRCS mul_compute.cc DEPS ${lite_kernel_deps} eigen3)

0 commit comments

Comments
 (0)