PaddlePaddle
diff --git a/‎cmake/generic.cmake‎
Lines changed: 1 addition & 5 deletions b/‎cmake/generic.cmake‎
Lines changed: 1 addition & 5 deletions
diff --git a/‎cmake/unity_build.cmake‎
Lines changed: 5 additions & 2 deletions b/‎cmake/unity_build.cmake‎
Lines changed: 5 additions & 2 deletions
diff --git a/‎paddle/fluid/distributed/service/communicator.h‎
Lines changed: 0 additions & 1 deletion b/‎paddle/fluid/distributed/service/communicator.h‎
Lines changed: 0 additions & 1 deletion
diff --git a/‎paddle/fluid/inference/api/analysis_predictor.cc‎
Lines changed: 1 addition & 0 deletions b/‎paddle/fluid/inference/api/analysis_predictor.cc‎
Lines changed: 1 addition & 0 deletions
diff --git a/‎paddle/fluid/inference/tensorrt/convert/CMakeLists.txt‎
Lines changed: 1 addition & 0 deletions b/‎paddle/fluid/inference/tensorrt/convert/CMakeLists.txt‎
Lines changed: 1 addition & 0 deletions
diff --git a/‎paddle/fluid/inference/tensorrt/convert/tile_op.cc‎
Lines changed: 78 additions & 0 deletions b/‎paddle/fluid/inference/tensorrt/convert/tile_op.cc‎
Lines changed: 78 additions & 0 deletions
diff --git a/‎paddle/fluid/inference/tensorrt/op_teller.cc‎
Lines changed: 21 additions & 1 deletion b/‎paddle/fluid/inference/tensorrt/op_teller.cc‎
Lines changed: 21 additions & 1 deletion
diff --git a/‎paddle/scripts/paddle_build.bat‎
Lines changed: 2 additions & 3 deletions b/‎paddle/scripts/paddle_build.bat‎
Lines changed: 2 additions & 3 deletions
diff --git a/‎python/paddle/fluid/dygraph/dygraph_to_static/partial_program.py‎
Lines changed: 13 additions & 5 deletions b/‎python/paddle/fluid/dygraph/dygraph_to_static/partial_program.py‎
Lines changed: 13 additions & 5 deletions
@@ -932,12 +932,8 @@ function(generate_dummy_static_lib)
   if(NOT dummy_GENERATOR)
     message(FATAL_ERROR "You must provide a generator file name.")
   endif()
-  # if ${dummy_GENERATOR} contains "/", it may be a file path
-  if(NOT ${dummy_GENERATOR} MATCHES ".*/.*")
-    set(dummy_GENERATOR "${CMAKE_CURRENT_LIST_DIR}/${dummy_GENERATOR}")
-  endif()
   if(NOT dummy_CONTENT)
-    set(dummy_CONTENT "${dummy_FILE_PATH} for lib ${dummy_LIB_NAME}")
+    set(dummy_CONTENT "${dummy_LIB_NAME}_dummy.c for lib ${dummy_LIB_NAME}")
   endif()
 
   configure_file(${PROJECT_SOURCE_DIR}/cmake/dummy.c.in ${dummy_FILE_PATH} @ONLY)
 
@@ -77,11 +77,14 @@ function(compose_unity_target_sources TARGET TYPE)
     get_property(unity_group_index_max GLOBAL PROPERTY ${TARGET}_${TYPE}_group_index)
     foreach(src ${ARGN})
         set(unity_file "")
-        # UB use absolute path of source.
+        # Note(zhouwei25): UB use the path releative to CMAKE_SOURCE_DIR.
+        # If use absolute path, sccache/ccache hit rate will be reduced.
         if(IS_ABSOLUTE ${src})
             set(src_absolute_path ${src})
+            file(RELATIVE_PATH src_relative_path ${CMAKE_SOURCE_DIR} ${src})
         else()
             set(src_absolute_path ${CMAKE_CURRENT_SOURCE_DIR}/${src})
+            file(RELATIVE_PATH src_relative_path ${CMAKE_SOURCE_DIR} ${src_absolute_path})
         endif()
         # If `unity_group_index_max` is empty, there is no combination
         # relationship.
@@ -106,7 +109,7 @@ function(compose_unity_target_sources TARGET TYPE)
                             set_property(GLOBAL APPEND PROPERTY ${unity_file_sources} ${UNITY_CU_BEFORE_CODE})
                         endif()
                     endif()
-                    set_property(GLOBAL APPEND PROPERTY ${unity_file_sources} "#include \"${src_absolute_path}\"")
+                    set_property(GLOBAL APPEND PROPERTY ${unity_file_sources} "#include \"${src_relative_path}\"")
                     set(unity_target_sources ${unity_target_sources} ${unity_file})
                     break()
                 endif()
 
@@ -63,7 +63,6 @@ class BlockingQueue {
  public:
   explicit BlockingQueue(size_t capacity) : capacity_(capacity) {
     PADDLE_ENFORCE_GT(capacity_, 0,
-
                       platform::errors::InvalidArgument(
                           "The capacity must be greater than 0."));
   }
 
@@ -1256,6 +1256,7 @@ USE_TRT_CONVERTER(reshape);
 USE_TRT_CONVERTER(reduce_sum);
 USE_TRT_CONVERTER(gather_nd);
 USE_TRT_CONVERTER(reduce_mean);
+USE_TRT_CONVERTER(tile);
 #endif
 
 namespace paddle_infer {
 
@@ -15,6 +15,7 @@ nv_library(tensorrt_converter
                 reshape_op.cc
                 reduce_op.cc
                 gather_nd_op.cc
+                tile_op.cc
            DEPS tensorrt_engine tensorrt_plugin operator scope framework_proto op_registry)
 
 nv_test(test_op_converter SRCS test_op_converter.cc DEPS
 
@@ -0,0 +1,78 @@
+/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+http://www.apache.org/licenses/LICENSE-2.0
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License. */
+
+#include "paddle/fluid/inference/tensorrt/convert/op_converter.h"
+
+namespace paddle {
+namespace framework {
+class Scope;
+namespace proto {
+class OpDesc;
+}  // namespace proto
+}  // namespace framework
+}  // namespace paddle
+
+namespace paddle {
+namespace inference {
+namespace tensorrt {
+
+/*
+ * ReshapeOp
+ */
+class TileOpConverter : public OpConverter {
+ public:
+  void operator()(const framework::proto::OpDesc& op,
+                  const framework::Scope& scope, bool test_mode) override {
+#if IS_TRT_VERSION_GE(7000)
+    VLOG(4) << "convert a fluid tile op to tensorrt tile layer";
+
+    framework::OpDesc op_desc(op, nullptr);
+    // Declare inputs
+    auto* input = engine_->GetITensor(op_desc.Input("X")[0]);
+    nvinfer1::Dims input_shape = input->getDimensions();
+    std::vector<int> repeat_times =
+        BOOST_GET_CONST(std::vector<int>, op_desc.GetAttr("repeat_times"));
+
+    nvinfer1::Dims output_dim = input_shape;
+    nvinfer1::Dims output_stride;
+    // If input_dims.nbDims + 1 < repeat_times.size() means we
+    // should expand 1 on batchsize. trt doesn't support this behavior.
+    PADDLE_ENFORCE_GE(input_shape.nbDims + 1, repeat_times.size(),
+                      platform::errors::InvalidArgument(
+                          "Can't change batchsize, please check repeat_times"));
+    int diff = input_shape.nbDims + 1 - repeat_times.size();
+    if (diff > 0) repeat_times.insert(repeat_times.begin(), diff, 1);
+
+    // Can't expand on batchsize
+    PADDLE_ENFORCE_EQ(
+        repeat_times[0], 1,
+        platform::errors::InvalidArgument(
+            "Can't expand on batchsize, please check repeat_times"));
+    output_stride.nbDims = input_shape.nbDims;
+    for (int i = 0; i < input_shape.nbDims; i++) {
+      output_dim.d[i] = output_dim.d[i] * repeat_times[i + 1];
+      output_stride.d[i] = 1;
+    }
+
+    auto* layer = TRT_ENGINE_ADD_LAYER(engine_, Slice, *input, input_shape,
+                                       output_dim, output_stride);
+    layer->setMode(nvinfer1::SliceMode::kWRAP);
+    auto output_name = op_desc.Output("Out")[0];
+    RreplenishLayerAndOutput(layer, "tile", {output_name}, test_mode);
+#endif
+  }
+};
+
+}  // namespace tensorrt
+}  // namespace inference
+}  // namespace paddle
+
+REGISTER_TRT_OP_CONVERTER(tile, TileOpConverter);
@@ -51,6 +51,9 @@ struct SimpleOpTypeSetTeller : public Teller {
 #if IS_TRT_VERSION_GE(7130)
     teller_set.insert("group_norm");
 #endif
+#if IS_TRT_VERSION_GE(7000)
+    teller_set.insert("tile");
+#endif
 #if CUDA_VERSION >= 10020
     teller_set.insert("reshape");
     teller_set.insert("reshape2");
@@ -716,19 +719,36 @@ bool OpTeller::Tell(const framework::ir::Node* node, bool use_no_calib_int8,
         VLOG(3) << "the " << op_type
                 << " does not have attr (keep_dim or dim or "
                    "reduce_all)";
+        std::cout << "attr " << desc.HasAttr("keep_dim") << " "
+                  << desc.HasAttr("dim") << " " << desc.HasAttr("reduce_all");
         return false;
       }
 
       // The batch size dimension cannot be reduced if it's not dynamic shape.
       if (!with_dynamic_shape) {
-        if (desc.HasAttr("reduce_all")) return false;
+        if (BOOST_GET_CONST(bool, desc.GetAttr("reduce_all"))) return false;
         std::vector<int32_t> dim =
             BOOST_GET_CONST(std::vector<int32_t>, desc.GetAttr("dim"));
         for (auto x : dim) {
           if (!x) return false;
         }
       }
     }
+#if IS_TRT_VERSION_GE(7000)
+    if (op_type == "tile") {
+      // Paddle-TRT does not support the input tensors.
+      auto inputs = desc.InputArgumentNames();
+      for (auto& input : inputs) {
+        if (input == "repeat_times_tensor" &&
+            desc.Input("repeat_times_tensor").size() > 0)
+          return false;
+        if (input == "RepeatTimes" && desc.Input("RepeatTimes").size() > 0)
+          return false;
+      }
+      if (with_dynamic_shape) return false;
+      if (!with_dynamic_shape && !desc.HasAttr("repeat_times")) return false;
+    }
+#endif
 
     if ((*teller)(op_type, desc, use_no_calib_int8)) return true;
   }
 
@@ -154,8 +154,10 @@ dir %cache_dir%
 dir paddle\fluid\pybind\Release
 rem -------Caching strategy 1: End --------------------------------
 
+
 rem -------Caching strategy 2: sccache decorate compiler-----------
 if "%WITH_SCCACHE%"=="ON" (
+    del D:\sccache\sccache_log.txt
     cmd /C sccache -V || call :install_sccache
     sccache --stop-server 2> NUL
     if not exist D:\sccache mkdir D:\sccache
@@ -324,17 +326,14 @@ if %day_now% NEQ %day_before% (
     echo %day_now% > %cache_dir%\day.txt
     type %cache_dir%\day.txt
     if %day_now% EQU 21 (
-        del D:\sccache\sccache_log.txt
         rmdir %cache_dir%\third_party_GPU /s/q
         rmdir %cache_dir%\third_party /s/q
     )
     if %day_now% EQU 11 (
-        del D:\sccache\sccache_log.txt
         rmdir %cache_dir%\third_party_GPU /s/q
         rmdir %cache_dir%\third_party /s/q
     )
     if %day_now% EQU 01 (
-        del D:\sccache\sccache_log.txt
         rmdir %cache_dir%\third_party_GPU /s/q
         rmdir %cache_dir%\third_party /s/q
     )
 
@@ -131,12 +131,16 @@ class PartialProgramLayer:
         Layer: A Layer object that run all ops internally in static mode.
     """
 
-    def __init__(self, main_program, inputs, outputs, parameters=None):
+    def __init__(self, main_program, inputs, outputs, parameters=None,
+                 **kwargs):
         super(PartialProgramLayer, self).__init__()
         self._inputs = NestSequence(inputs)
         self._outputs = NestSequence(outputs, need_check=True)
         self._params = parameters if parameters is not None else []
 
+        self._build_strategy = kwargs.get('build_strategy', BuildStrategy())
+        assert isinstance(self._build_strategy, BuildStrategy)
+
         self._origin_main_program = self._verify_program(main_program)
         self._tmp_scope_vec = self._create_scope_vec()
         # A fake_var to handle empty input or output
@@ -170,7 +174,11 @@ def _infer_program_id(self):
 
     @LazyInitialized
     def _train_program_id(self):
-        return _hash_with_id(self._train_program, self)
+        program_id = _hash_with_id(self._train_program, self)
+        core._set_cached_executor_build_strategy(program_id,
+                                                 self._build_strategy)
+
+        return program_id
 
     def _verify_program(self, main_program):
         """
@@ -451,6 +459,6 @@ def partial_program_from(concrete_program):
     if inputs and isinstance(inputs[0], layers.Layer):
         inputs = inputs[1:]
 
-    return PartialProgramLayer(concrete_program.main_program, inputs,
-                               concrete_program.outputs,
-                               concrete_program.parameters)
+    return PartialProgramLayer(
+        concrete_program.main_program, inputs, concrete_program.outputs,
+        concrete_program.parameters, **concrete_program.kwargs)
Original file line number	Diff line number	Diff line change
`@@ -63,7 +63,6 @@ class BlockingQueue {`
`63`	`63`	`public:`
`64`	`64`	`explicit BlockingQueue(size_t capacity) : capacity_(capacity) {`
`65`	`65`	`PADDLE_ENFORCE_GT(capacity_, 0,`
`66`		`-`
`67`	`66`	`platform::errors::InvalidArgument(`
`68`	`67`	`"The capacity must be greater than 0."));`
`69`	`68`	`}`