Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
34 commits
Select commit Hold shift + click to select a range
418edae
auto parallel bf16 (#49079)
xu98bin Dec 29, 2022
3c2420a
xpu kernels support api int64 vector inputs, test=kunlun (#49336)
ykkk2333 Dec 29, 2022
a30e360
Add scale and floor_divide ut cases (#49418)
Bobholamovic Dec 29, 2022
ffa32e4
[D2SCinn]Support deliver skip_gc_vars into Graph (#49411)
Aurelius84 Dec 29, 2022
9e6007f
[fluid remove] rawconv (#49395)
wangzhen38 Dec 29, 2022
3481ff5
fix_static_problem (#49439)
risemeup1 Dec 29, 2022
839e149
fix_bug (#49390)
risemeup1 Dec 29, 2022
3ffcd69
Yj/rm legacy part 0 (#49424)
yjjiang11 Dec 30, 2022
8a85955
Fix default GetExpectedKernelType for ops supported tensor attrs (#49…
0x45f Dec 30, 2022
162f8fe
fix_mac_build_problem (#49435)
risemeup1 Dec 30, 2022
a186e60
在文档中统一静态图模式与动态图模式的英文翻译 (#49170)
sanbuphy Dec 30, 2022
72973d5
[clean fluid api] Move fluid/contrib/slim and remove fluid api. (#48717)
zzjjay Dec 30, 2022
9f5afa6
[Custom Extension] Polish xpu testcase (#49158)
jiahy0825 Dec 30, 2022
18f0ab8
fix possible bug (#49367)
jiahy0825 Dec 30, 2022
23c1ac2
Support static graph code-gen for squeeze and unsqueeze op (#49430)
zyfncg Dec 30, 2022
b2f4182
unit test of reduce with zero dim (#49436)
sljlp Dec 30, 2022
0111d01
delete batch_norm (#49396)
risemeup1 Dec 30, 2022
802c579
revert phi_static (#49433)
zhiqiu Dec 30, 2022
6e5917e
[inference][trt] update Convolution to ConvolutionNd (#47653)
zhangjun Dec 30, 2022
a4b4343
Fix test_conv_bn_fuse_pass_cc on Windows System (#49446)
zyfncg Dec 30, 2022
69c7edc
[Custom device] Add custom_cpu testcase of custom_relu (#49300)
jiahy0825 Dec 30, 2022
3e8cec8
[CI-Precision] Optimize precision test logic (#49441)
zhangbo9674 Dec 30, 2022
5c4adfa
check weight shape of conv1d_transpose (#49417)
wangxinxin08 Dec 30, 2022
291cf82
[ bugfix ] fix bugs in Indexable and support LayerDict (#49409)
2742195759 Dec 30, 2022
4458a1e
speedup lcov (#49476)
zhangbo9674 Dec 30, 2022
cb22a5c
support flip 0D (#49460)
Caozhou1995 Dec 30, 2022
aa96ddc
memorty_optimize remove inplace op (#49431)
linsheng011 Jan 1, 2023
18c0a00
Scale Matmul Fuse pass rewritten (#49105)
HulekJakub Jan 2, 2023
c5137b2
[Auto Parallel] Add All Relu Flops (#48083)
CjhHa1 Jan 3, 2023
822ea0f
Add not_equal trt converter (#49393)
sanbuphy Jan 3, 2023
fe0dc40
[FluidAPI]remove clip api (#48946)
wj-Mcat Jan 3, 2023
121eaea
fix nvcc_lazy error on ninja build (#49448)
zhiqiu Jan 3, 2023
021085e
forbid ops who have 1D intermediate tensor entering Paddle-TRT (#49378)
zhoutianzi666 Jan 3, 2023
347d212
[Zero-Dim] reshape/reshape_/reverse 0D support (#49357)
zhaoyinglia Jan 3, 2023
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion cmake/external/xpu.cmake
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@ set(XPU_RT_LIB_NAME "libxpurt.so")
if(NOT DEFINED XPU_BASE_URL)
set(XPU_BASE_URL_WITHOUT_DATE
"https://baidu-kunlun-product.su.bcebos.com/KL-SDK/klsdk-dev")
set(XPU_BASE_URL "${XPU_BASE_URL_WITHOUT_DATE}/20221215")
set(XPU_BASE_URL "${XPU_BASE_URL_WITHOUT_DATE}/20221227")
else()
set(XPU_BASE_URL "${XPU_BASE_URL}")
endif()
Expand Down
1 change: 1 addition & 0 deletions cmake/generic.cmake
Original file line number Diff line number Diff line change
Expand Up @@ -195,6 +195,7 @@ function(create_dummy_static_lib TARGET_NAME)
# the dummy target would be consisted of limit size libraries
set(limit ${merge_LIMIT})
list(LENGTH merge_LIBS libs_len)
message("libs_len ${libs_len}")
foreach(lib ${merge_LIBS})
list(APPEND merge_list ${lib})
list(LENGTH merge_list listlen)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -739,6 +739,14 @@ def CollectBackwardInfo(self):
self.backward_returns_list,
) = ParseYamlBackward(backward_args_str, backward_returns_str)

# Remove the output which is intermediate
if 'intermediate' in grad_api_contents:
backward_returns_list_new = []
for return_item in self.backward_returns_list:
if return_item[0] not in grad_api_contents['intermediate']:
backward_returns_list_new.append(return_item)
self.backward_returns_list = backward_returns_list_new

def CollectForwardInfoFromBackwardContents(self):

backward_forward_str = self.backward_forward_str
Expand Down Expand Up @@ -1979,7 +1987,6 @@ def GenerateNodeDefinition(
fill_zero_str += f"{indent}egr::EagerUtils::FillZeroForEmptyGradInput(&grads[{fwd_position}], input_metas[{fwd_position}]);\n"

inplace_grad_input_str = ""
inplaced_tensor_wrapper = False
inplace_check_str = ""
optional_inplace_var_name = []
# Grad Ins from TensorWrappers
Expand Down
2 changes: 1 addition & 1 deletion paddle/fluid/eager/autograd_meta.h
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,7 @@ using AbstractAutogradMeta = paddle::experimental::AbstractAutogradMeta;
*
* AutogradMeta is what record the backward info for tensor. When we run
* computation graph eagerly, we can not build a static paddle program like
* static mode do, so we need a new method to record forward info to trace
* static graph mode do, so we need a new method to record forward info to trace
* backward when we finish all forward computation. This require our
* AutogradMeta class record following main members
*
Expand Down
4 changes: 0 additions & 4 deletions paddle/fluid/framework/ir/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -429,10 +429,6 @@ if(WITH_MKLDNN)
test_conv_batch_norm_mkldnn_fuse_pass
SRCS mkldnn/mkldnn_conv_bn_fuse_pass_tester.cc
DEPS ${TEST_CONV_BN_PASS_DEPS})
cc_test(
test_scale_matmul_fuse_pass
SRCS mkldnn/scale_matmul_fuse_pass_tester.cc
DEPS scale_matmul_fuse_pass)
cc_test(
test_mkldnn_placement_pass
SRCS mkldnn/mkldnn_placement_pass_tester.cc
Expand Down
6 changes: 5 additions & 1 deletion paddle/fluid/framework/ir/conv_bn_fuse_pass_tester.cc
Original file line number Diff line number Diff line change
Expand Up @@ -32,7 +32,11 @@ void AddVarToScope(Scope* param_scope,
const DDim& dims) {
auto* tensor = param_scope->Var(name)->GetMutable<phi::DenseTensor>();
tensor->Resize(dims);
tensor->mutable_data<float>(platform::CPUPlace());
auto* data = tensor->mutable_data<float>(platform::CPUPlace());
int64_t numel = tensor->numel();
for (int64_t i = 0; i < numel; ++i) {
data[i] = 0;
}
}

Scope* CreateParamScope() {
Expand Down
117 changes: 0 additions & 117 deletions paddle/fluid/framework/ir/mkldnn/scale_matmul_fuse_pass_tester.cc

This file was deleted.

Original file line number Diff line number Diff line change
Expand Up @@ -760,7 +760,7 @@ bool BuildOpFuncList(const platform::Place& place,
new phi::Kernel(phi::KernelFactory::Instance().SelectKernel(
phi_kernel_name, phi_cpu_kernel_key)));
if (op_with_kernel->PhiKernel()->IsValid()) {
VLOG(6) << "Static mode PrepareImpl - kernel name: "
VLOG(6) << "Static graph mode PrepareImpl - kernel name: "
<< phi_kernel_name
<< " | kernel key: " << phi_cpu_kernel_key
<< " | kernel: " << *(op_with_kernel->PhiKernel());
Expand Down
34 changes: 25 additions & 9 deletions paddle/fluid/framework/operator.cc
Original file line number Diff line number Diff line change
Expand Up @@ -1603,11 +1603,11 @@ void OperatorWithKernel::RunImpl(const Scope& scope,
}
#endif

auto exe_ctx = ExecutionContext(*this, scope, *dev_ctx, *runtime_ctx);
// using cache
if (kernel_type_.get()) {
dev_ctx = pool.Get(kernel_type_->place_);
}
auto exe_ctx = ExecutionContext(*this, scope, *dev_ctx, *runtime_ctx);

// TODO(Liu-xiandong): Now we are using too much if-else and hard code in XPU
// device, it's ugly, and we will refactor in the future.
Expand Down Expand Up @@ -1679,12 +1679,12 @@ void OperatorWithKernel::RunImpl(const Scope& scope,
phi_kernel_name, phi_kernel_key)));

if (phi_kernel_->IsValid()) {
VLOG(6) << "Static mode ChoosePhiKernel - kernel name: "
VLOG(6) << "Static graph mode ChoosePhiKernel - kernel name: "
<< phi_kernel_name << " | kernel key: " << phi_kernel_key
<< " | kernel: " << *phi_kernel_;
} else {
VLOG(6) << "Static mode ChoosePhiKernel - kernel `" << phi_kernel_name
<< "` not found.";
VLOG(6) << "Static graph mode ChoosePhiKernel - kernel `"
<< phi_kernel_name << "` not found.";
}
} else {
phi_kernel_name = kernel_signature_->name;
Expand Down Expand Up @@ -1815,7 +1815,7 @@ void OperatorWithKernel::RunImpl(const Scope& scope,

dev_ctx = pool.Get(platform::CPUPlace());
if (phi_kernel_->IsValid()) {
VLOG(6) << "Static mode PrepareImpl - kernel name: "
VLOG(6) << "Static graph mode PrepareImpl - kernel name: "
<< phi_kernel_name << " | kernel key: " << phi_cpu_kernel_key
<< " | kernel: " << *phi_kernel_;
run_phi_kernel_ = true;
Expand Down Expand Up @@ -2083,11 +2083,11 @@ phi::KernelKey OperatorWithKernel::ChoosePhiKernel(
phi_kernel_name, phi_kernel_key)));

if (phi_kernel_->IsValid()) {
VLOG(6) << "Static mode ChoosePhiKernel - kernel name: " << phi_kernel_name
<< " | kernel key: " << phi_kernel_key
VLOG(6) << "Static graph mode ChoosePhiKernel - kernel name: "
<< phi_kernel_name << " | kernel key: " << phi_kernel_key
<< " | kernel: " << *phi_kernel_;
} else {
VLOG(6) << "Static mode ChoosePhiKernel - kernel `" << phi_kernel_name
VLOG(6) << "Static graph mode ChoosePhiKernel - kernel `" << phi_kernel_name
<< "` not found.";
}
return phi_kernel_key;
Expand Down Expand Up @@ -2715,7 +2715,23 @@ proto::VarType::Type OperatorWithKernel::IndicateDataType(
proto::VarType::Type dafault_data_type =
static_cast<proto::VarType::Type>(-1);
proto::VarType::Type data_type = dafault_data_type;
for (auto* name : ctx.InNameList()) {

auto in_name_list = ctx.InNameList();
if (Info().HasOpProtoAndChecker()) {
for (auto& attr : Info().Proto().attrs()) {
auto it =
std::find_if(in_name_list.begin(),
in_name_list.end(),
[&attr](const std::string* name) {
return attr.support_tensor() && *name == attr.name();
});
if (it != in_name_list.end()) {
in_name_list.erase(it);
}
}
}

for (auto* name : in_name_list) {
if (ctx.InputSize(*name) == 1UL) {
ParseInputDataType(ctx.InputVar(*name), *name, &data_type);
} else {
Expand Down
2 changes: 1 addition & 1 deletion paddle/fluid/imperative/tracer.h
Original file line number Diff line number Diff line change
Expand Up @@ -136,7 +136,7 @@ class Tracer {
}

// Note(Aurelius84): The `tmp` is used as prefix key while naming a temporary
// intermediate var both in imperative and static mode. But the
// intermediate var both in imperative and static graph mode. But the
// `UniqueNameGenerator` in C++ and `unique_name.py` in Python doesn't share
// the same auto-increment id. It will create a variable repeatedly with same
// name like `tmp_0` in some cases when transform dygraph into static layers.
Expand Down
46 changes: 46 additions & 0 deletions paddle/fluid/inference/analysis/passes/memory_optimize_pass.cc
100644 → 100755
Original file line number Diff line number Diff line change
Expand Up @@ -222,6 +222,51 @@ void MakeSimpleReusePlan(
}
}

// Remove the inplace operation from the plan because it does not support memory
// reuse
void DelInplaceOpFromPlan(
Graph* graph,
std::unordered_map<std::string, std::string>* node2cluster,
int sort_kind) {
auto topo_nodes = TopologyVarientSort(
*graph, static_cast<framework::ir::SortKind>(sort_kind));
for (auto* op_node : topo_nodes) {
if (!op_node->IsOp()) continue;
auto input_tensors = op_node->inputs;
auto output_tensors = op_node->outputs;

std::unordered_set<std::string> in_names;
for (const Node* node : input_tensors) {
if (!node->Var()) continue;
if (node->Var()->Persistable()) continue;
std::string var = node->Name();
in_names.insert(var);
}

for (const Node* node : output_tensors) {
if (!node->Var()) continue;
if (node->Var()->Persistable()) continue;
std::string var = node->Name();
if (in_names.find(var) != in_names.end()) {
// delete key
if (node2cluster->count(var)) {
node2cluster->erase(var);
}
// delete value
std::string tmp_name = "";
for (auto it = node2cluster->begin(); it != node2cluster->end(); ++it) {
if (it->second == var) {
if (tmp_name == "") {
tmp_name = it->first;
}
it->second = tmp_name;
}
}
}
}
}
}

// NOTE The optimized opdesc doesn't match ir::Graph.
void UpdateOpDescsByReuse(
Graph* graph,
Expand Down Expand Up @@ -324,6 +369,7 @@ void MemoryOptimizePass::RunImpl(Argument* argument) {
CollectLifeCycle(graph, &lifecycles, sort_kind);
CollectVarMemorySize(graph, &space_table);
MakeSimpleReusePlan(lifecycles, space_table, &node2cluster, &cluster_size);
DelInplaceOpFromPlan(graph, &node2cluster, sort_kind);

auto* pass_res_info = PassResultInfoForRuntime::Instance();
pass_res_info->Set(
Expand Down
1 change: 1 addition & 0 deletions paddle/fluid/inference/api/analysis_predictor.cc
Original file line number Diff line number Diff line change
Expand Up @@ -2396,6 +2396,7 @@ USE_TRT_CONVERTER(cast)
USE_TRT_CONVERTER(recover_padding)
USE_TRT_CONVERTER(remove_padding)
USE_TRT_CONVERTER(equal);
USE_TRT_CONVERTER(not_equal);
USE_TRT_CONVERTER(top_k)
USE_TRT_CONVERTER(top_k_v2)
USE_TRT_CONVERTER(range)
Expand Down
31 changes: 16 additions & 15 deletions paddle/fluid/inference/tensorrt/convert/c_allreduce_op.cc
Original file line number Diff line number Diff line change
@@ -1,16 +1,16 @@
/* Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.

Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at

http://www.apache.org/licenses/LICENSE-2.0

Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

#include "paddle/fluid/inference/tensorrt/convert/op_converter.h"
#include "paddle/fluid/inference/tensorrt/plugin/c_allreduce_op_plugin.h"
Expand All @@ -32,8 +32,9 @@ class CAllReduceOpConverter : public OpConverter {
bool test_mode) override {
VLOG(4) << "convert fluid callreduce op to tensorrt layer";
if (!engine_->with_dynamic_shape()) {
PADDLE_THROW(platform::errors::Fatal(
"Unsupported static mode. Please set dynamic shape of inputs."));
PADDLE_THROW(
platform::errors::Fatal("Unsupported static graph mode. Please set "
"dynamic shape of inputs."));
}
ReduceType red_type = op_to_reduce_type[op.type()];
std::string name = op.type();
Expand Down
Loading