Skip to content

Commit 0063af1

Browse files
committed
[Inference] optimize some code and fix some bug (PaddlePaddle#48780)
* clean ir_pass_manager and fix map_depthwise_conv_to_conv_pass * fix unitest timeout
1 parent f1d4a67 commit 0063af1

File tree

8 files changed

+56
-64
lines changed

8 files changed

+56
-64
lines changed

paddle/fluid/framework/ir/delete_fill_constant_op_pass.cc

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -29,6 +29,11 @@ void FillConstData(LoDTensor* out_t, T value) {
2929
}
3030

3131
void DeleteFillConstantOpPass::ApplyImpl(ir::Graph* graph) const {
32+
bool with_dynamic_shape = Get<bool>("with_dynamic_shape");
33+
// Not support
34+
if (with_dynamic_shape) {
35+
return;
36+
}
3237
FusePassBase::Init("delete_fill_constant_op_pass", graph);
3338
GraphPatternDetector detector;
3439
auto fill_constant_op =

paddle/fluid/framework/ir/float_to_half_pass.cc

Lines changed: 40 additions & 24 deletions
Original file line numberDiff line numberDiff line change
@@ -16,7 +16,12 @@
1616

1717
#include "paddle/fluid/framework/ir/graph_helper.h"
1818
#include "paddle/fluid/framework/operator.h"
19-
#include "paddle/phi/common/data_type.h"
19+
#include "paddle/phi/common/bfloat16.h"
20+
#include "paddle/phi/common/float16.h"
21+
#include "paddle/phi/common/place.h"
22+
#include "paddle/phi/core/dense_tensor.h"
23+
#include "paddle/phi/core/enforce.h"
24+
#include "paddle/phi/core/errors.h"
2025

2126
namespace paddle {
2227
namespace framework {
@@ -620,34 +625,45 @@ void FloatToHalfPass::ConvertWeightsData() const {
620625
for (const auto& var_name : var_names) {
621626
if (vars_convert_to_half_.count(var_name)) {
622627
VLOG(4) << var_name << "'s data type was convert to half";
623-
#define CONVERT_TENSOR_DTYPE(DTYPE, dtype) \
624-
half_tensor.set_type(DTYPE); \
625-
auto* half_data = half_tensor.mutable_data<dtype>(platform::CPUPlace()); \
626-
for (int64_t i = 0; i < origin_tensor->numel(); i++) { \
627-
half_data[i] = static_cast<dtype>(origin_data[i]); \
628-
} \
629-
origin_tensor->clear(); \
630-
paddle::framework::TensorCopySync( \
631-
half_tensor, platform::CPUPlace(), origin_tensor)
632628

633629
auto* var = scope->FindLocalVar(var_name);
634-
635-
if (var->IsType<phi::DenseTensor>()) {
636-
auto* origin_tensor = var->GetMutable<phi::DenseTensor>();
637-
phi::DenseTensor half_tensor;
638-
half_tensor.Resize(origin_tensor->dims());
639-
auto* origin_data =
640-
origin_tensor->mutable_data<float>(platform::CPUPlace());
641-
if (half_precision_ == phi::DataType::FLOAT16) {
642-
CONVERT_TENSOR_DTYPE(paddle::experimental::DataType::FLOAT16,
643-
phi::dtype::float16);
644-
} else if (half_precision_ == phi::DataType::BFLOAT16) {
645-
CONVERT_TENSOR_DTYPE(paddle::experimental::DataType::BFLOAT16,
646-
phi::dtype::bfloat16);
630+
CHECK_EQ(var->IsType<phi::DenseTensor>(), true);
631+
632+
auto* origin_tensor = var->GetMutable<phi::DenseTensor>();
633+
634+
phi::DenseTensor half_tensor;
635+
half_tensor.Resize(origin_tensor->dims());
636+
half_tensor.set_type(half_precision_);
637+
638+
if (half_precision_ == phi::DataType::FLOAT16) {
639+
auto* half_data =
640+
half_tensor.mutable_data<phi::dtype::float16>(phi::CPUPlace{});
641+
for (int64_t i = 0; i < origin_tensor->numel(); i++) {
642+
if (origin_tensor->dtype() == phi::DataType::FLOAT64) {
643+
auto* origin_data = origin_tensor->data<double>();
644+
half_data[i] = static_cast<phi::dtype::float16>(origin_data[i]);
645+
} else if (origin_tensor->dtype() == phi::DataType::FLOAT32) {
646+
auto* origin_data = origin_tensor->data<float>();
647+
half_data[i] = static_cast<phi::dtype::float16>(origin_data[i]);
648+
}
649+
}
650+
} else if (half_precision_ == phi::DataType::BFLOAT16) {
651+
auto* half_data =
652+
half_tensor.mutable_data<phi::dtype::bfloat16>(phi::CPUPlace{});
653+
for (int64_t i = 0; i < origin_tensor->numel(); i++) {
654+
if (origin_tensor->dtype() == phi::DataType::FLOAT64) {
655+
auto* origin_data = origin_tensor->data<double>();
656+
half_data[i] = static_cast<phi::dtype::bfloat16>(origin_data[i]);
657+
} else if (origin_tensor->dtype() == phi::DataType::FLOAT32) {
658+
auto* origin_data = origin_tensor->data<float>();
659+
half_data[i] = static_cast<phi::dtype::bfloat16>(origin_data[i]);
660+
}
647661
}
648662
}
663+
origin_tensor->clear();
664+
paddle::framework::TensorCopySync(
665+
half_tensor, phi::CPUPlace{}, origin_tensor);
649666
}
650-
#undef CONVERT_TENSOR_DTYPE
651667
}
652668
}
653669

paddle/fluid/framework/ir/float_to_half_pass.h

Lines changed: 0 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -22,9 +22,6 @@
2222
#include "paddle/fluid/framework/ir/node.h"
2323
#include "paddle/phi/common/backend.h"
2424
#include "paddle/phi/common/data_type.h"
25-
#include "paddle/phi/common/float16.h"
26-
#include "paddle/phi/common/layout.h"
27-
#include "paddle/phi/common/place.h"
2825

2926
namespace paddle {
3027
namespace framework {

paddle/fluid/inference/analysis/ir_pass_manager.cc

Lines changed: 2 additions & 25 deletions
Original file line numberDiff line numberDiff line change
@@ -27,6 +27,7 @@
2727
#include "paddle/fluid/framework/scope.h"
2828
#include "paddle/fluid/inference/analysis/argument.h"
2929
#include "paddle/fluid/string/pretty_log.h"
30+
#include "paddle/phi/core/errors.h"
3031

3132
namespace paddle {
3233
namespace inference {
@@ -306,42 +307,18 @@ void IRPassManager::CreatePasses(Argument *argument,
306307
}
307308

308309
std::unique_ptr<Graph> IRPassManager::Apply(std::unique_ptr<Graph> graph) {
309-
if (passes_.empty()) {
310-
return graph;
311-
}
312310
PADDLE_ENFORCE_NOT_NULL(
313-
graph.get(),
314-
platform::errors::PreconditionNotMet("Graph cannot be NULL."));
311+
graph.get(), platform::errors::InvalidArgument("Graph cannot be null."));
315312
// Apply all the passes
316313
for (const auto &pass : passes_) {
317314
if (pass->Type() != "graph_viz_pass" && !disable_logs_) {
318315
PrettyLogEndl(Style::H2(), "--- Running IR pass [%s]", pass->Type());
319316
}
320-
// delete_fill_constant_op_pass is not apply under trt dynamic shape
321-
if (pass->Type() == "delete_fill_constant_op_pass") {
322-
bool use_dynamic = pass->Get<bool>("with_dynamic_shape");
323-
if (use_dynamic) continue;
324-
}
325317
graph.reset(pass->Apply(graph.release()));
326318
}
327319
return graph;
328320
}
329321

330-
framework::proto::ProgramDesc IRPassManager::AcquireProgram(
331-
std::unique_ptr<Graph> *graph, ProgramDesc *program) const {
332-
auto pass =
333-
framework::ir::PassRegistry::Instance().Get("graph_to_program_pass");
334-
335-
// Direct using ProgramDesc desc(argument->main_program()) may cause
336-
// incomplete copies of information.
337-
ProgramDesc desc;
338-
desc.CopyFrom(*program->Proto());
339-
pass->SetNotOwned("program", &desc);
340-
auto *the_graph = graph->release();
341-
graph->reset(pass->Apply(the_graph));
342-
return *desc.Proto();
343-
}
344-
345322
} // namespace analysis
346323
} // namespace inference
347324
} // namespace paddle

paddle/fluid/inference/analysis/ir_pass_manager.h

Lines changed: 0 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -48,15 +48,9 @@ class IRPassManager final {
4848

4949
std::unique_ptr<Graph> Apply(std::unique_ptr<Graph> graph);
5050

51-
framework::proto::ProgramDesc AcquireProgram(std::unique_ptr<Graph> *graph,
52-
ProgramDesc *program) const;
53-
54-
framework::ir::Graph &graph() const { return *graph_; }
55-
5651
private:
5752
void CreatePasses(Argument *argument, const std::vector<std::string> &passes);
5853

59-
std::unique_ptr<Graph> graph_;
6054
std::vector<std::unique_ptr<Pass>> passes_;
6155
bool disable_logs_{false};
6256
};

paddle/fluid/inference/api/analysis_config.cc

Lines changed: 7 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -108,6 +108,7 @@ void AnalysisConfig::EnableUseGpu(uint64_t memory_pool_init_size_mb,
108108
}
109109
#else
110110
LOG(ERROR) << "Please use PaddlePaddle with GPU version.";
111+
use_gpu_ = false;
111112
#endif
112113

113114
Update();
@@ -292,7 +293,7 @@ void AnalysisConfig::LoadIpuConfig(const std::string &config_path) {
292293

293294
if (ipu_config_mapper_.find(key) == ipu_config_mapper_.end()) {
294295
PADDLE_THROW(platform::errors::InvalidArgument(
295-
"invalid key {} in IPU config", key));
296+
"invalid key %s in IPU config: ", key));
296297
}
297298
switch (ipu_config_mapper_.at(key)) {
298299
case ipu_config_code::ipu_device_num:
@@ -325,10 +326,12 @@ void AnalysisConfig::LoadIpuConfig(const std::string &config_path) {
325326
case ipu_config_code::ipu_custom_patterns:
326327
ipu_custom_patterns_ = string2vector(value);
327328
break;
328-
329+
case ipu_config_code::ipu_enable_model_runtime_executor:
330+
ipu_enable_model_runtime_executor_ = string2bool(value);
331+
break;
329332
default:
330333
PADDLE_THROW(platform::errors::InvalidArgument(
331-
"invalid key {} in IPU config", key));
334+
"invalid key %s in IPU config", key));
332335
break;
333336
}
334337
}
@@ -1377,7 +1380,7 @@ bool AnalysisConfig::trt_allow_build_at_runtime() {
13771380
return trt_allow_build_at_runtime_;
13781381
}
13791382

1380-
void AnalysisConfig::Exp_DisableMixedInferOps(
1383+
void AnalysisConfig::Exp_DisableMixedPrecisionOps(
13811384
const std::unordered_set<std::string> &black_list) {
13821385
mixed_black_list_ = black_list;
13831386
}

paddle/fluid/inference/api/paddle_analysis_config.h

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -971,7 +971,7 @@ struct PD_INFER_DECL AnalysisConfig {
971971
/// interface is in the experimental stage and may change in the future. Note
972972
/// that the blacklist must be the same as the model conversion blacklist.
973973
///
974-
void Exp_DisableMixedInferOps(
974+
void Exp_DisableMixedPrecisionOps(
975975
const std::unordered_set<std::string>& black_list);
976976

977977
void SetApplyOptim(bool value) { apply_optim_ = value; }

paddle/fluid/inference/tests/api/CMakeLists.txt

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -418,7 +418,7 @@ if(WITH_GPU)
418418
analyzer_ernie_tester.cc)
419419
inference_analysis_api_test(gpu_ernie_half_test ${ERNIE_INSTALL_DIR}
420420
gpu_ernie_half_test.cc)
421-
set_tests_properties(gpu_ernie_half_test PROPERTIES TIMEOUT 40)
421+
set_tests_properties(gpu_ernie_half_test PROPERTIES TIMEOUT 60)
422422
endif()
423423
inference_analysis_api_int8_test(test_analyzer_ernie_int8 ${ERNIE_INSTALL_DIR}
424424
analyzer_ernie_int8_tester.cc)

0 commit comments

Comments
 (0)