Skip to content

Commit f73d6c4

Browse files
committed
fix style
2 parents 4308f91 + cffa15c commit f73d6c4

File tree

18 files changed

+377
-38
lines changed

18 files changed

+377
-38
lines changed

cmake/generic.cmake

Lines changed: 1 addition & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -932,12 +932,8 @@ function(generate_dummy_static_lib)
932932
if(NOT dummy_GENERATOR)
933933
message(FATAL_ERROR "You must provide a generator file name.")
934934
endif()
935-
# if ${dummy_GENERATOR} contains "/", it may be a file path
936-
if(NOT ${dummy_GENERATOR} MATCHES ".*/.*")
937-
set(dummy_GENERATOR "${CMAKE_CURRENT_LIST_DIR}/${dummy_GENERATOR}")
938-
endif()
939935
if(NOT dummy_CONTENT)
940-
set(dummy_CONTENT "${dummy_FILE_PATH} for lib ${dummy_LIB_NAME}")
936+
set(dummy_CONTENT "${dummy_LIB_NAME}_dummy.c for lib ${dummy_LIB_NAME}")
941937
endif()
942938

943939
configure_file(${PROJECT_SOURCE_DIR}/cmake/dummy.c.in ${dummy_FILE_PATH} @ONLY)

cmake/unity_build.cmake

Lines changed: 5 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -77,11 +77,14 @@ function(compose_unity_target_sources TARGET TYPE)
7777
get_property(unity_group_index_max GLOBAL PROPERTY ${TARGET}_${TYPE}_group_index)
7878
foreach(src ${ARGN})
7979
set(unity_file "")
80-
# UB use absolute path of source.
80+
# Note(zhouwei25): UB use the path releative to CMAKE_SOURCE_DIR.
81+
# If use absolute path, sccache/ccache hit rate will be reduced.
8182
if(IS_ABSOLUTE ${src})
8283
set(src_absolute_path ${src})
84+
file(RELATIVE_PATH src_relative_path ${CMAKE_SOURCE_DIR} ${src})
8385
else()
8486
set(src_absolute_path ${CMAKE_CURRENT_SOURCE_DIR}/${src})
87+
file(RELATIVE_PATH src_relative_path ${CMAKE_SOURCE_DIR} ${src_absolute_path})
8588
endif()
8689
# If `unity_group_index_max` is empty, there is no combination
8790
# relationship.
@@ -106,7 +109,7 @@ function(compose_unity_target_sources TARGET TYPE)
106109
set_property(GLOBAL APPEND PROPERTY ${unity_file_sources} ${UNITY_CU_BEFORE_CODE})
107110
endif()
108111
endif()
109-
set_property(GLOBAL APPEND PROPERTY ${unity_file_sources} "#include \"${src_absolute_path}\"")
112+
set_property(GLOBAL APPEND PROPERTY ${unity_file_sources} "#include \"${src_relative_path}\"")
110113
set(unity_target_sources ${unity_target_sources} ${unity_file})
111114
break()
112115
endif()

paddle/fluid/distributed/service/communicator.h

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -63,7 +63,6 @@ class BlockingQueue {
6363
public:
6464
explicit BlockingQueue(size_t capacity) : capacity_(capacity) {
6565
PADDLE_ENFORCE_GT(capacity_, 0,
66-
6766
platform::errors::InvalidArgument(
6867
"The capacity must be greater than 0."));
6968
}

paddle/fluid/inference/api/analysis_predictor.cc

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1256,6 +1256,7 @@ USE_TRT_CONVERTER(reshape);
12561256
USE_TRT_CONVERTER(reduce_sum);
12571257
USE_TRT_CONVERTER(gather_nd);
12581258
USE_TRT_CONVERTER(reduce_mean);
1259+
USE_TRT_CONVERTER(tile);
12591260
#endif
12601261

12611262
namespace paddle_infer {

paddle/fluid/inference/tensorrt/convert/CMakeLists.txt

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -15,6 +15,7 @@ nv_library(tensorrt_converter
1515
reshape_op.cc
1616
reduce_op.cc
1717
gather_nd_op.cc
18+
tile_op.cc
1819
DEPS tensorrt_engine tensorrt_plugin operator scope framework_proto op_registry)
1920

2021
nv_test(test_op_converter SRCS test_op_converter.cc DEPS
Lines changed: 78 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,78 @@
1+
/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
2+
Licensed under the Apache License, Version 2.0 (the "License");
3+
you may not use this file except in compliance with the License.
4+
You may obtain a copy of the License at
5+
http://www.apache.org/licenses/LICENSE-2.0
6+
Unless required by applicable law or agreed to in writing, software
7+
distributed under the License is distributed on an "AS IS" BASIS,
8+
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
9+
See the License for the specific language governing permissions and
10+
limitations under the License. */
11+
12+
#include "paddle/fluid/inference/tensorrt/convert/op_converter.h"
13+
14+
namespace paddle {
15+
namespace framework {
16+
class Scope;
17+
namespace proto {
18+
class OpDesc;
19+
} // namespace proto
20+
} // namespace framework
21+
} // namespace paddle
22+
23+
namespace paddle {
24+
namespace inference {
25+
namespace tensorrt {
26+
27+
/*
28+
* ReshapeOp
29+
*/
30+
class TileOpConverter : public OpConverter {
31+
public:
32+
void operator()(const framework::proto::OpDesc& op,
33+
const framework::Scope& scope, bool test_mode) override {
34+
#if IS_TRT_VERSION_GE(7000)
35+
VLOG(4) << "convert a fluid tile op to tensorrt tile layer";
36+
37+
framework::OpDesc op_desc(op, nullptr);
38+
// Declare inputs
39+
auto* input = engine_->GetITensor(op_desc.Input("X")[0]);
40+
nvinfer1::Dims input_shape = input->getDimensions();
41+
std::vector<int> repeat_times =
42+
BOOST_GET_CONST(std::vector<int>, op_desc.GetAttr("repeat_times"));
43+
44+
nvinfer1::Dims output_dim = input_shape;
45+
nvinfer1::Dims output_stride;
46+
// If input_dims.nbDims + 1 < repeat_times.size() means we
47+
// should expand 1 on batchsize. trt doesn't support this behavior.
48+
PADDLE_ENFORCE_GE(input_shape.nbDims + 1, repeat_times.size(),
49+
platform::errors::InvalidArgument(
50+
"Can't change batchsize, please check repeat_times"));
51+
int diff = input_shape.nbDims + 1 - repeat_times.size();
52+
if (diff > 0) repeat_times.insert(repeat_times.begin(), diff, 1);
53+
54+
// Can't expand on batchsize
55+
PADDLE_ENFORCE_EQ(
56+
repeat_times[0], 1,
57+
platform::errors::InvalidArgument(
58+
"Can't expand on batchsize, please check repeat_times"));
59+
output_stride.nbDims = input_shape.nbDims;
60+
for (int i = 0; i < input_shape.nbDims; i++) {
61+
output_dim.d[i] = output_dim.d[i] * repeat_times[i + 1];
62+
output_stride.d[i] = 1;
63+
}
64+
65+
auto* layer = TRT_ENGINE_ADD_LAYER(engine_, Slice, *input, input_shape,
66+
output_dim, output_stride);
67+
layer->setMode(nvinfer1::SliceMode::kWRAP);
68+
auto output_name = op_desc.Output("Out")[0];
69+
RreplenishLayerAndOutput(layer, "tile", {output_name}, test_mode);
70+
#endif
71+
}
72+
};
73+
74+
} // namespace tensorrt
75+
} // namespace inference
76+
} // namespace paddle
77+
78+
REGISTER_TRT_OP_CONVERTER(tile, TileOpConverter);

paddle/fluid/inference/tensorrt/op_teller.cc

Lines changed: 21 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -51,6 +51,9 @@ struct SimpleOpTypeSetTeller : public Teller {
5151
#if IS_TRT_VERSION_GE(7130)
5252
teller_set.insert("group_norm");
5353
#endif
54+
#if IS_TRT_VERSION_GE(7000)
55+
teller_set.insert("tile");
56+
#endif
5457
#if CUDA_VERSION >= 10020
5558
teller_set.insert("reshape");
5659
teller_set.insert("reshape2");
@@ -716,19 +719,36 @@ bool OpTeller::Tell(const framework::ir::Node* node, bool use_no_calib_int8,
716719
VLOG(3) << "the " << op_type
717720
<< " does not have attr (keep_dim or dim or "
718721
"reduce_all)";
722+
std::cout << "attr " << desc.HasAttr("keep_dim") << " "
723+
<< desc.HasAttr("dim") << " " << desc.HasAttr("reduce_all");
719724
return false;
720725
}
721726

722727
// The batch size dimension cannot be reduced if it's not dynamic shape.
723728
if (!with_dynamic_shape) {
724-
if (desc.HasAttr("reduce_all")) return false;
729+
if (BOOST_GET_CONST(bool, desc.GetAttr("reduce_all"))) return false;
725730
std::vector<int32_t> dim =
726731
BOOST_GET_CONST(std::vector<int32_t>, desc.GetAttr("dim"));
727732
for (auto x : dim) {
728733
if (!x) return false;
729734
}
730735
}
731736
}
737+
#if IS_TRT_VERSION_GE(7000)
738+
if (op_type == "tile") {
739+
// Paddle-TRT does not support the input tensors.
740+
auto inputs = desc.InputArgumentNames();
741+
for (auto& input : inputs) {
742+
if (input == "repeat_times_tensor" &&
743+
desc.Input("repeat_times_tensor").size() > 0)
744+
return false;
745+
if (input == "RepeatTimes" && desc.Input("RepeatTimes").size() > 0)
746+
return false;
747+
}
748+
if (with_dynamic_shape) return false;
749+
if (!with_dynamic_shape && !desc.HasAttr("repeat_times")) return false;
750+
}
751+
#endif
732752

733753
if ((*teller)(op_type, desc, use_no_calib_int8)) return true;
734754
}

paddle/scripts/paddle_build.bat

Lines changed: 2 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -154,8 +154,10 @@ dir %cache_dir%
154154
dir paddle\fluid\pybind\Release
155155
rem -------Caching strategy 1: End --------------------------------
156156

157+
157158
rem -------Caching strategy 2: sccache decorate compiler-----------
158159
if "%WITH_SCCACHE%"=="ON" (
160+
del D:\sccache\sccache_log.txt
159161
cmd /C sccache -V || call :install_sccache
160162
sccache --stop-server 2> NUL
161163
if not exist D:\sccache mkdir D:\sccache
@@ -324,17 +326,14 @@ if %day_now% NEQ %day_before% (
324326
echo %day_now% > %cache_dir%\day.txt
325327
type %cache_dir%\day.txt
326328
if %day_now% EQU 21 (
327-
del D:\sccache\sccache_log.txt
328329
rmdir %cache_dir%\third_party_GPU /s/q
329330
rmdir %cache_dir%\third_party /s/q
330331
)
331332
if %day_now% EQU 11 (
332-
del D:\sccache\sccache_log.txt
333333
rmdir %cache_dir%\third_party_GPU /s/q
334334
rmdir %cache_dir%\third_party /s/q
335335
)
336336
if %day_now% EQU 01 (
337-
del D:\sccache\sccache_log.txt
338337
rmdir %cache_dir%\third_party_GPU /s/q
339338
rmdir %cache_dir%\third_party /s/q
340339
)

python/paddle/fluid/dygraph/dygraph_to_static/partial_program.py

Lines changed: 13 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -131,12 +131,16 @@ class PartialProgramLayer:
131131
Layer: A Layer object that run all ops internally in static mode.
132132
"""
133133

134-
def __init__(self, main_program, inputs, outputs, parameters=None):
134+
def __init__(self, main_program, inputs, outputs, parameters=None,
135+
**kwargs):
135136
super(PartialProgramLayer, self).__init__()
136137
self._inputs = NestSequence(inputs)
137138
self._outputs = NestSequence(outputs, need_check=True)
138139
self._params = parameters if parameters is not None else []
139140

141+
self._build_strategy = kwargs.get('build_strategy', BuildStrategy())
142+
assert isinstance(self._build_strategy, BuildStrategy)
143+
140144
self._origin_main_program = self._verify_program(main_program)
141145
self._tmp_scope_vec = self._create_scope_vec()
142146
# A fake_var to handle empty input or output
@@ -170,7 +174,11 @@ def _infer_program_id(self):
170174

171175
@LazyInitialized
172176
def _train_program_id(self):
173-
return _hash_with_id(self._train_program, self)
177+
program_id = _hash_with_id(self._train_program, self)
178+
core._set_cached_executor_build_strategy(program_id,
179+
self._build_strategy)
180+
181+
return program_id
174182

175183
def _verify_program(self, main_program):
176184
"""
@@ -451,6 +459,6 @@ def partial_program_from(concrete_program):
451459
if inputs and isinstance(inputs[0], layers.Layer):
452460
inputs = inputs[1:]
453461

454-
return PartialProgramLayer(concrete_program.main_program, inputs,
455-
concrete_program.outputs,
456-
concrete_program.parameters)
462+
return PartialProgramLayer(
463+
concrete_program.main_program, inputs, concrete_program.outputs,
464+
concrete_program.parameters, **concrete_program.kwargs)

0 commit comments

Comments
 (0)