Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
24 changes: 16 additions & 8 deletions paddle/fluid/inference/anakin/engine.cc
Original file line number Diff line number Diff line change
Expand Up @@ -32,18 +32,26 @@ namespace paddle {
namespace inference {
namespace anakin {

template <typename TargetT, Precision PrecisionType, OpRunType RunType>
extern std::once_flag
AnakinEngine<TargetT, PrecisionType, RunType>::init_anakin_;

template <typename TargetT, Precision PrecisionType, OpRunType RunType>
AnakinEngine<TargetT, PrecisionType, RunType>::AnakinEngine(
bool need_summary, int device, int max_batch_size,
std::map<std::string, std::vector<int>> max_input_shape,
std::vector<std::string> program_inputs, bool auto_config_layout)
: graph_(new AnakinGraphT<TargetT, PrecisionType>()),
net_(new AnakinNetT<TargetT, PrecisionType, RunType>(need_summary)) {
device_ = device;
max_batch_size_ = max_batch_size;
max_input_shape_ = max_input_shape;
program_inputs_ = program_inputs;
auto_config_layout_ = auto_config_layout;
: device_(device),
max_batch_size_(max_batch_size),
max_input_shape_(max_input_shape),
program_inputs_(program_inputs),
auto_config_layout_(auto_config_layout) {
std::call_once(init_anakin_, [this]() {
::anakin::TargetWrapper<TargetT>::set_device(device_);
::anakin::Env<TargetT>::env_init();
});
graph_.reset(new AnakinGraphT<TargetT, PrecisionType>());
net_.reset(new AnakinNetT<TargetT, PrecisionType, RunType>(need_summary));
}

template <typename TargetT, Precision PrecisionType, OpRunType RunType>
Expand Down Expand Up @@ -102,7 +110,7 @@ void AnakinEngine<TargetT, PrecisionType, RunType>::BindInput(
anakin_input = net_->get_in(input.first);
}
anakin_input->reshape(fluid_input_shape);
::anakin::saber::Tensor<TargetT> tmp_anakin_tensor(data, TargetT(), 0,
::anakin::saber::Tensor<TargetT> tmp_anakin_tensor(data, TargetT(), device_,
fluid_input_shape);
anakin_input->copy_from(tmp_anakin_tensor);
}
Expand Down
5 changes: 3 additions & 2 deletions paddle/fluid/inference/anakin/engine.h
Original file line number Diff line number Diff line change
Expand Up @@ -114,12 +114,13 @@ class AnakinEngine {

private:
bool initialized_{false};
int device_;
int max_batch_size_;
std::map<std::string, std::vector<int>> max_input_shape_;
int device_;
std::vector<std::string> program_inputs_;
std::unique_ptr<GraphT> graph_;
std::unique_ptr<NetT> net_;
std::vector<std::string> program_inputs_;
static std::once_flag init_anakin_;
std::unordered_map<std::string, float> tensor_scales_;
// Always be false in gpu mode but true in most cpu cases.
bool auto_config_layout_;
Expand Down
1 change: 1 addition & 0 deletions paddle/fluid/inference/api/analysis_config.cc
Original file line number Diff line number Diff line change
Expand Up @@ -369,6 +369,7 @@ float AnalysisConfig::fraction_of_gpu_memory_for_pool() const {
// Get the GPU memory details and calculate the fraction of memory for the
// GPU memory pool.
size_t gpu_used, gpu_available;
platform::SetDeviceId(device_id_);
platform::GpuMemoryUsage(&gpu_used, &gpu_available);
double total_gpu_memory = (gpu_used + gpu_available) / 1024. / 1024.;
float fraction_of_gpu_memory =
Expand Down