Skip to content

Commit 1850dfc

Browse files
committed
Merge branch 'develop' of https://github.com/PaddlePaddle/Paddle into variable-keeping-with-ensor
2 parents 67837ee + 32e3353 commit 1850dfc

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

63 files changed

+1488
-538
lines changed

cmake/external/lite.cmake

Lines changed: 22 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -18,13 +18,21 @@ if(NOT LINUX)
1818
return()
1919
endif()
2020

21-
if(XPU_SDK_ROOT)
22-
set(LITE_WITH_XPU ON)
23-
include_directories("${XPU_SDK_ROOT}/XTDK/include")
24-
include_directories("${XPU_SDK_ROOT}/XTCL/include")
21+
if (LITE_WITH_XPU)
2522
add_definitions(-DLITE_SUBGRAPH_WITH_XPU)
26-
LINK_DIRECTORIES("${XPU_SDK_ROOT}/XTDK/shlib/")
27-
LINK_DIRECTORIES("${XPU_SDK_ROOT}/XTDK/runtime/shlib/")
23+
IF(WITH_AARCH64)
24+
SET(XPU_SDK_ENV "kylin_aarch64")
25+
ELSEIF(WITH_SUNWAY)
26+
SET(XPU_SDK_ENV "deepin_sw6_64")
27+
ELSEIF(WITH_BDCENTOS)
28+
SET(XPU_SDK_ENV "bdcentos_x86_64")
29+
ELSEIF(WITH_UBUNTU)
30+
SET(XPU_SDK_ENV "ubuntu_x86_64")
31+
ELSEIF(WITH_CENTOS)
32+
SET(XPU_SDK_ENV "centos7_x86_64")
33+
ELSE ()
34+
SET(XPU_SDK_ENV "ubuntu_x86_64")
35+
ENDIF()
2836
endif()
2937

3038
if (NOT LITE_SOURCE_DIR OR NOT LITE_BINARY_DIR)
@@ -57,7 +65,8 @@ if (NOT LITE_SOURCE_DIR OR NOT LITE_BINARY_DIR)
5765
-DWITH_TESTING=OFF
5866
-DLITE_BUILD_EXTRA=ON
5967
-DLITE_WITH_XPU=${LITE_WITH_XPU}
60-
-DXPU_SDK_ROOT=${XPU_SDK_ROOT}
68+
-DXPU_SDK_URL=${XPU_BASE_URL}
69+
-DXPU_SDK_ENV=${XPU_SDK_ENV}
6170
-DLITE_WITH_CODE_META_INFO=OFF
6271
-DLITE_WITH_ARM=ON)
6372
ExternalProject_Add(
@@ -99,7 +108,8 @@ if (NOT LITE_SOURCE_DIR OR NOT LITE_BINARY_DIR)
99108
-DLITE_WITH_STATIC_CUDA=OFF
100109
-DCUDA_ARCH_NAME=${CUDA_ARCH_NAME}
101110
-DLITE_WITH_XPU=${LITE_WITH_XPU}
102-
-DXPU_SDK_ROOT=${XPU_SDK_ROOT}
111+
-DXPU_SDK_URL=${XPU_SDK_URL}
112+
-DXPU_SDK_ENV=${XPU_SDK_ENV}
103113
-DLITE_WITH_CODE_META_INFO=OFF
104114
-DLITE_WITH_ARM=OFF)
105115

@@ -147,6 +157,10 @@ message(STATUS "Paddle-lite BINARY_DIR: ${LITE_BINARY_DIR}")
147157
message(STATUS "Paddle-lite SOURCE_DIR: ${LITE_SOURCE_DIR}")
148158
include_directories(${LITE_SOURCE_DIR})
149159
include_directories(${LITE_BINARY_DIR})
160+
if(LITE_WITH_XPU)
161+
include_directories(${LITE_BINARY_DIR}/third_party/install/xpu/xdnn/include/)
162+
include_directories(${LITE_BINARY_DIR}/third_party/install/xpu/xre/include/)
163+
endif()
150164

151165
function(external_lite_libs alias path)
152166
add_library(${alias} SHARED IMPORTED GLOBAL)

cmake/external/xpu.cmake

Lines changed: 5 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -33,7 +33,10 @@ ELSE ()
3333
SET(XPU_XCCL_DIR_NAME "xccl-bdcentos_x86_64")
3434
ENDIF()
3535

36-
SET(XPU_BASE_URL "https://baidu-kunlun-product.cdn.bcebos.com/KL-SDK/klsdk-dev/20210527")
36+
IF(NOT XPU_BASE_URL)
37+
SET(XPU_BASE_URL "https://baidu-kunlun-product.cdn.bcebos.com/KL-SDK/klsdk-dev/20210527")
38+
ENDIF()
39+
3740
SET(XPU_XRE_URL "${XPU_BASE_URL}/${XPU_XRE_DIR_NAME}.tar.gz" CACHE STRING "" FORCE)
3841
SET(XPU_XDNN_URL "${XPU_BASE_URL}/${XPU_XDNN_DIR_NAME}.tar.gz" CACHE STRING "" FORCE)
3942
SET(XPU_XCCL_URL "${XPU_BASE_URL}/${XPU_XCCL_DIR_NAME}.tar.gz" CACHE STRING "" FORCE)
@@ -93,11 +96,7 @@ ELSE(WITH_XPU_BKCL)
9396
TARGET_LINK_LIBRARIES(xpulib ${XPU_API_LIB} ${XPU_RT_LIB})
9497
ENDIF(WITH_XPU_BKCL)
9598

96-
if(NOT XPU_SDK_ROOT)
97-
ADD_DEPENDENCIES(xpulib ${XPU_PROJECT})
98-
else()
99-
ADD_CUSTOM_TARGET(extern_xpu DEPENDS xpulib)
100-
endif()
99+
ADD_DEPENDENCIES(xpulib ${XPU_PROJECT})
101100

102101
# Ensure that xpu/api.h can be included without dependency errors.
103102
file(GENERATE OUTPUT ${CMAKE_CURRENT_BINARY_DIR}/.xpu_headers_dummy.cc CONTENT "")

paddle/fluid/framework/ir/op_compat_sensible_pass.cc

Lines changed: 0 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -75,9 +75,6 @@ AttrCompat& AttrCompat::IsLeftDefault() {
7575
}
7676

7777
bool AttrCompat::operator()(const OpDesc& op_desc) {
78-
if (conditions_.empty()) {
79-
return true;
80-
}
8178
if (!op_desc.HasAttr(attr_name_)) {
8279
if (!optional_) {
8380
LOG(WARNING) << "The non-optional Attr(" << attr_name_ << ") of Op ("

paddle/fluid/inference/tensorrt/op_teller.cc

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -694,7 +694,7 @@ bool OpTeller::Tell(const framework::ir::Node* node, bool use_no_calib_int8,
694694
return false;
695695
// Paddle-TRT does not support the input tensors: Shape and ShapeTensor
696696
} else if (desc.Input("Shape").size() >= 1 ||
697-
desc.Input("ShapeTensor").size() >= 1 || with_dynamic_shape) {
697+
desc.Input("ShapeTensor").size() >= 1) {
698698
return false;
699699
} else {
700700
std::vector<int> shape =

paddle/fluid/inference/tensorrt/plugin/layer_norm_op_plugin.cu

Lines changed: 1 addition & 61 deletions
Original file line numberDiff line numberDiff line change
@@ -182,69 +182,9 @@ int LayerNormPluginDynamic::enqueue(
182182
paddle::operators::LayerNormDirectCUDAFunctor<float> layer_norm;
183183
layer_norm(stream, input, input_shape, bias_d, scale_d, output, mean_d,
184184
variance_d, begin_norm_axis, eps);
185-
} else if (input_type == nvinfer1::DataType::kHALF) {
186-
#ifdef TRT_PLUGIN_FP16_AVALIABLE
187-
VLOG(1) << "TRT Plugin DataType selected. LayerNorm-->fp16";
188-
const half *input = reinterpret_cast<const half *>(inputs[0]);
189-
half *output = static_cast<half *>(outputs[0]);
190-
size_t mean_shape_product = 1;
191-
for (auto s : mean_shape_) {
192-
mean_shape_product *= s;
193-
}
194-
size_t variance_shape_product = 1;
195-
for (auto s : variance_shape_) {
196-
variance_shape_product *= s;
197-
}
198-
if (!scale_gpu_half_d_) {
199-
cudaMalloc(&scale_gpu_half_d_, feature_size * sizeof(half));
200-
}
201-
if (!bias_gpu_half_d_) {
202-
cudaMalloc(&bias_gpu_half_d_, feature_size * sizeof(half));
203-
}
204-
if (!mean_gpu_half_d_) {
205-
cudaMalloc(&mean_gpu_half_d_, mean_shape_product * sizeof(half));
206-
}
207-
if (!variance_gpu_half_d_) {
208-
cudaMalloc(&variance_gpu_half_d_, variance_shape_product * sizeof(half));
209-
}
210-
211-
half *scale_cpu_half =
212-
static_cast<half *>(malloc(feature_size * sizeof(half)));
213-
half *bias_cpu_half =
214-
static_cast<half *>(malloc(feature_size * sizeof(half)));
215-
PADDLE_ENFORCE_EQ(
216-
scale_cpu_half && bias_cpu_half, true,
217-
platform::errors::Unavailable("Out of memory, malloc size %d.",
218-
feature_size * sizeof(half)));
219-
220-
for (int i = 0; i < feature_size; i++) {
221-
scale_cpu_half[i] = static_cast<half>(scale_[i]);
222-
bias_cpu_half[i] = static_cast<half>(bias_[i]);
223-
}
224-
cudaMemcpyAsync(scale_gpu_half_d_, scale_cpu_half,
225-
sizeof(half) * feature_size, cudaMemcpyHostToDevice,
226-
stream);
227-
cudaMemcpyAsync(bias_gpu_half_d_, bias_cpu_half,
228-
sizeof(half) * feature_size, cudaMemcpyHostToDevice,
229-
stream);
230-
free(scale_cpu_half);
231-
free(bias_cpu_half);
232-
233-
paddle::operators::LayerNormDirectCUDAFunctor<half> layer_norm;
234-
layer_norm(stream, input, input_shape, bias_gpu_half_d_, scale_gpu_half_d_,
235-
output, mean_gpu_half_d_, variance_gpu_half_d_, begin_norm_axis,
236-
eps);
237-
#else
238-
PADDLE_THROW(platform::errors::Fatal(
239-
"The layer_norm tensorRT plugin should be "
240-
"complied with CUDA version >= 10.0 when running with fp16. "
241-
"Please recomplie it or try to use fp32 by set "
242-
"config.SetTRTDynamicShapeInfo(min_input_shape, "
243-
"max_input_shape, opt_input_shape, true"));
244-
#endif
245185
} else {
246186
PADDLE_THROW(platform::errors::Fatal(
247-
"The LayerNorm TRT Plugin's input type should be float or half."));
187+
"The LayerNorm TRT Plugin's input type should be float."));
248188
}
249189
return cudaGetLastError() != cudaSuccess;
250190
}

paddle/fluid/inference/tensorrt/plugin/layer_norm_op_plugin.h

Lines changed: 2 additions & 29 deletions
Original file line numberDiff line numberDiff line change
@@ -114,22 +114,14 @@ class LayerNormPluginDynamic : public DynamicPluginTensorRT {
114114
: begin_norm_axis_(begin_norm_axis),
115115
eps_(eps),
116116
mean_shape_(mean_shape),
117-
variance_shape_(variance_shape),
118-
scale_gpu_half_d_(nullptr),
119-
bias_gpu_half_d_(nullptr),
120-
mean_gpu_half_d_(nullptr),
121-
variance_gpu_half_d_(nullptr) {
117+
variance_shape_(variance_shape) {
122118
bias_.resize(bias_num);
123119
scale_.resize(scale_num);
124120
std::copy(bias, bias + bias_num, bias_.data());
125121
std::copy(scale, scale + scale_num, scale_.data());
126122
}
127123

128-
LayerNormPluginDynamic(void const* serialData, size_t serialLength)
129-
: scale_gpu_half_d_(nullptr),
130-
bias_gpu_half_d_(nullptr),
131-
mean_gpu_half_d_(nullptr),
132-
variance_gpu_half_d_(nullptr) {
124+
LayerNormPluginDynamic(void const* serialData, size_t serialLength) {
133125
DeserializeValue(&serialData, &serialLength, &bias_);
134126
DeserializeValue(&serialData, &serialLength, &scale_);
135127
DeserializeValue(&serialData, &serialLength, &begin_norm_axis_);
@@ -190,21 +182,6 @@ class LayerNormPluginDynamic : public DynamicPluginTensorRT {
190182
const nvinfer1::DataType* inputTypes,
191183
int nbInputs) const override;
192184

193-
~LayerNormPluginDynamic() {
194-
if (scale_gpu_half_d_) {
195-
cudaFree(scale_gpu_half_d_);
196-
}
197-
if (bias_gpu_half_d_) {
198-
cudaFree(bias_gpu_half_d_);
199-
}
200-
if (mean_gpu_half_d_) {
201-
cudaFree(mean_gpu_half_d_);
202-
}
203-
if (variance_gpu_half_d_) {
204-
cudaFree(variance_gpu_half_d_);
205-
}
206-
}
207-
208185
void destroy() override { delete this; }
209186

210187
private:
@@ -218,10 +195,6 @@ class LayerNormPluginDynamic : public DynamicPluginTensorRT {
218195
float eps_;
219196
std::vector<int64_t> mean_shape_;
220197
std::vector<int64_t> variance_shape_;
221-
half* scale_gpu_half_d_;
222-
half* bias_gpu_half_d_;
223-
half* mean_gpu_half_d_;
224-
half* variance_gpu_half_d_;
225198
};
226199

227200
class LayerNormPluginDynamicCreator : public nvinfer1::IPluginCreator {

paddle/fluid/operators/compat/batch_norm.pbtxt

Lines changed: 15 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -18,6 +18,21 @@ def {
1818
outputs {
1919
name: "Y"
2020
}
21+
outputs {
22+
name: "MeanOut"
23+
}
24+
outputs {
25+
name: "VarianceOut"
26+
}
27+
outputs {
28+
name: "SavedMean"
29+
}
30+
outputs {
31+
name: "SavedVariance"
32+
}
33+
outputs {
34+
name: "ReserveSpace"
35+
}
2136
attrs {
2237
name: "epsilon"
2338
type: FLOAT
@@ -55,21 +70,6 @@ extra {
5570
name: "trainable_statistics"
5671
type: BOOLEAN
5772
}
58-
outputs {
59-
name: "MeanOut"
60-
}
61-
outputs {
62-
name: "VarianceOut"
63-
}
64-
outputs {
65-
name: "SavedMean"
66-
}
67-
outputs {
68-
name: "SavedVariance"
69-
}
70-
outputs {
71-
name: "ReserveSpace"
72-
}
7373
attrs {
7474
name: "op_role"
7575
type: INT

paddle/fluid/operators/compat/conv2d.pbtxt

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -32,6 +32,10 @@ def {
3232
name: "dilations"
3333
type: INTS
3434
}
35+
attrs {
36+
name: "data_format"
37+
type: STRING
38+
}
3539
}
3640
extra {
3741
inputs {
@@ -113,10 +117,6 @@ extra {
113117
name: "force_fp32_output"
114118
type: BOOLEAN
115119
}
116-
attrs {
117-
name: "data_format"
118-
type: STRING
119-
}
120120
attrs {
121121
name: "workspace_size_MB"
122122
type: INT

paddle/fluid/operators/compat/conv2d_transpose.pbtxt

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
type: "reduce_mean"
1+
type: "conv2d_transpose"
22
def {
33
inputs {
44
name: "Input"
@@ -40,6 +40,10 @@ def {
4040
name: "padding_algorithm"
4141
type: STRING
4242
}
43+
attrs {
44+
name: "data_format"
45+
type: STRING
46+
}
4347
}
4448
extra {
4549
attrs {
@@ -78,10 +82,6 @@ extra {
7882
name: "fuse_beta"
7983
type: FLOAT
8084
}
81-
attrs {
82-
name: "data_format"
83-
type: STRING
84-
}
8585
attrs {
8686
name: "workspace_size_MB"
8787
type: INT

0 commit comments

Comments
 (0)