Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 3 additions & 2 deletions Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -75,8 +75,9 @@ RUN curl -s -q https://glide.sh/get | sh
# and its size is only one-third of the official one.
# 2. Manually add ~IPluginFactory() in IPluginFactory class of NvInfer.h, otherwise, it couldn't work in paddle.
# See https://github.com/PaddlePaddle/Paddle/issues/10129 for details.
RUN wget -qO- http://paddlepaddledeps.cdn.bcebos.com/TensorRT-4.0.0.3.Ubuntu-16.04.4.x86_64-gnu.cuda-8.0.cudnn7.0.tar.gz | \
tar -xz -C /usr/local && \

RUN wget -q https://paddlepaddledeps.cdn.bcebos.com/TensorRT-4.0.1.6-ubuntu14.04.x86_64-gnu.cuda.8.0.cudnn7.0.tar.gz --no-check-certificate && \
tar -zxf TensorRT-4.0.1.6-ubuntu14.04.x86_64-gnu.cuda.8.0.cudnn7.0.tar.gz -C /usr/local && \
cp -rf /usr/local/TensorRT/include /usr && \
cp -rf /usr/local/TensorRT/lib /usr

Expand Down
5 changes: 5 additions & 0 deletions paddle/fluid/framework/ir/fuse_pass_base.h
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@

#pragma once

#include <string>
#include "paddle/fluid/framework/ir/graph.h"
#include "paddle/fluid/framework/ir/pass.h"
#include "paddle/fluid/framework/scope.h"
Expand All @@ -24,6 +25,10 @@ namespace ir {

static const char kParamScopeAttr[] = "__param_scope__";
static const char kFuseStatisAttr[] = "__fuse_statis__";
// When we use trt or other third_party lib, the parameters are managed by
// the lib, but not the fluid. So we need to record them to avoid duplicate
// allocation.
static const char kRepetitiveParamAttr[] = "__repetitive_param__";

enum FuseOptions {
DO_NOT_FUSE, // fusing will not be done
Expand Down
6 changes: 6 additions & 0 deletions paddle/fluid/inference/analysis/argument.h
Original file line number Diff line number Diff line change
Expand Up @@ -23,8 +23,12 @@

#pragma once

#include <memory>
#include <string>
#include <unordered_map>
#include <unordered_set>
#include <vector>

#include "paddle/fluid/framework/ir/graph.h"
#include "paddle/fluid/framework/program_desc.h"
#include "paddle/fluid/framework/scope.h"
Expand Down Expand Up @@ -133,6 +137,8 @@ struct Argument {
DECL_ARGUMENT_FIELD(tensorrt_min_subgraph_size, TensorRtMinSubgraphSize, int);
DECL_ARGUMENT_FIELD(tensorrt_precision_mode, TensorRtPrecisionMode,
AnalysisConfig::Precision);
DECL_ARGUMENT_FIELD(tensorrt_use_static_engine, TensorRtUseStaticEngine,
bool);

// Memory optimized related.
DECL_ARGUMENT_FIELD(enable_memory_optim, EnableMemoryOptim, bool);
Expand Down
31 changes: 31 additions & 0 deletions paddle/fluid/inference/analysis/helper.h
Original file line number Diff line number Diff line change
Expand Up @@ -17,10 +17,12 @@ limitations under the License. */
#include <sys/stat.h>
#include <cstdio>
#include <fstream>
#include <memory>
#include <set>
#include <string>
#include <typeindex>
#include <unordered_map>
#include <utility>
#include <vector>

#include "paddle/fluid/framework/framework.pb.h"
Expand Down Expand Up @@ -217,6 +219,35 @@ static std::string GetTrtCalibTableData(const std::string &model_opt_cache_dir,
return "";
}

static std::string GetTrtEngineSerializedPath(const std::string &model_root,
const std::string &engine_key) {
return model_root + "/trt_serialized_" + engine_key;
}

static std::string GetTrtEngineSerializedData(
const std::string &model_opt_cache_dir, const std::string &engine_key) {
std::string trt_serialized_path =
GetTrtEngineSerializedPath(model_opt_cache_dir, engine_key);
if (FileExists(trt_serialized_path)) {
VLOG(3) << "Trt serialized file: " << trt_serialized_path
<< "is found here";
std::ifstream infile(trt_serialized_path, std::ios::in);
std::stringstream buffer;
buffer << infile.rdbuf();
std::string trt_engine_serialized_data(buffer.str());
return trt_engine_serialized_data;
}
return "";
}

static void SaveTrtEngineSerializedDataToFile(
const std::string &trt_serialized_path,
const std::string &engine_serialized_data) {
std::ofstream outfile(trt_serialized_path);
outfile << engine_serialized_data;
outfile.close();
}

} // namespace analysis
} // namespace inference
} // namespace paddle
Expand Down
3 changes: 3 additions & 0 deletions paddle/fluid/inference/analysis/ir_pass_manager.cc
Original file line number Diff line number Diff line change
Expand Up @@ -89,6 +89,9 @@ void IRPassManager::CreatePasses(Argument *argument,
pass->Set(
"model_opt_cache_dir",
new std::string(GetOrCreateModelOptCacheDir(model_opt_cache_dir)));
pass->Set("gpu_device_id", new int(argument->gpu_device_id()));
pass->Set("use_static_engine",
new bool(argument->tensorrt_use_static_engine()));
}

pre_pass = pass_name;
Expand Down
3 changes: 3 additions & 0 deletions paddle/fluid/inference/analysis/ir_pass_manager.h
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,10 @@

#pragma once

#include <memory>
#include <string>
#include <unordered_set>
#include <utility>
#include <vector>
#include "paddle/fluid/framework/ir/graph.h"
#include "paddle/fluid/framework/ir/pass.h"
Expand Down
Loading