Skip to content
8 changes: 7 additions & 1 deletion paddle/fluid/inference/api/analysis_predictor.cc
Original file line number Diff line number Diff line change
Expand Up @@ -591,7 +591,6 @@ std::unique_ptr<PaddlePredictor> CreatePaddlePredictor<
gflags.push_back("--allocator_strategy=thread_local");
process_level_allocator_enabled = false;
} else {
gflags.push_back("--allocator_strategy=naive_best_fit");
process_level_allocator_enabled = true;
}

Expand Down Expand Up @@ -890,6 +889,11 @@ bool AnalysisPredictor::LoadParameters() {
return true;
}

void AnalysisPredictor::ShrinkMemory() {
ClearIntermediateTensor();
paddle::memory::Release(place_);
}

void AnalysisPredictor::ClearIntermediateTensor() {
PADDLE_ENFORCE_NOT_NULL(inference_program_.get(),
platform::errors::PreconditionNotMet(
Expand Down Expand Up @@ -1142,6 +1146,8 @@ void Predictor::ClearIntermediateTensor() {
predictor_->ClearIntermediateTensor();
}

void Predictor::ShrinkMemory() { predictor_->ShrinkMemory(); }

int GetNumBytesOfDataType(DataType dtype) {
switch (dtype) {
case DataType::FLOAT32:
Expand Down
6 changes: 6 additions & 0 deletions paddle/fluid/inference/api/analysis_predictor.h
Original file line number Diff line number Diff line change
Expand Up @@ -193,6 +193,12 @@ class AnalysisPredictor : public PaddlePredictor {
///
void ClearIntermediateTensor();

///
/// \brief Release all tmp tensor to compress the size of the memory pool.
/// After this operation, we reduced the memory usage.
///
void ShrinkMemory() override;

///
/// \brief Get the argument used by predictor
///
Expand Down
43 changes: 43 additions & 0 deletions paddle/fluid/inference/api/analysis_predictor_tester.cc
Original file line number Diff line number Diff line change
Expand Up @@ -135,6 +135,7 @@ TEST(AnalysisPredictor, ZeroCopy) {
auto* out_data = out->data<float>(&place, &size);
LOG(INFO) << "output size: " << size / sizeof(float);
LOG(INFO) << "output_data: " << out_data;
predictor->ShrinkMemory();
}

TEST(AnalysisPredictor, Clone) {
Expand Down Expand Up @@ -507,3 +508,45 @@ TEST(AnalysisPredictor, bf16_pass_strategy) {
}

} // namespace paddle

namespace paddle_infer {

TEST(Predictor, Run) {
Config config;
config.SetModel(FLAGS_dirname);

auto predictor = CreatePredictor(config);

auto w0 = predictor->GetInputHandle("firstw");
auto w1 = predictor->GetInputHandle("secondw");
auto w2 = predictor->GetInputHandle("thirdw");
auto w3 = predictor->GetInputHandle("forthw");

w0->Reshape({4, 1});
w1->Reshape({4, 1});
w2->Reshape({4, 1});
w3->Reshape({4, 1});

auto* w0_data = w0->mutable_data<int64_t>(PlaceType::kCPU);
auto* w1_data = w1->mutable_data<int64_t>(PlaceType::kCPU);
auto* w2_data = w2->mutable_data<int64_t>(PlaceType::kCPU);
auto* w3_data = w3->mutable_data<int64_t>(PlaceType::kCPU);

for (int i = 0; i < 4; i++) {
w0_data[i] = i;
w1_data[i] = i;
w2_data[i] = i;
w3_data[i] = i;
}

predictor->Run();

auto out = predictor->GetOutputHandle("fc_1.tmp_2");
PlaceType place;
int size = 0;
out->data<float>(&place, &size);
LOG(INFO) << "output size: " << size / sizeof(float);
predictor->ShrinkMemory();
}

} // namespace paddle_infer
1 change: 1 addition & 0 deletions paddle/fluid/inference/api/api_tester.cc
Original file line number Diff line number Diff line change
Expand Up @@ -60,6 +60,7 @@ TEST(paddle_inference_api, demo) {
auto predictor = CreatePaddlePredictor(config);
std::vector<PaddleTensor> outputs;
predictor->Run({}, &outputs);
predictor->ShrinkMemory();
}

TEST(paddle_inference_api, get_version) {
Expand Down
6 changes: 6 additions & 0 deletions paddle/fluid/inference/api/paddle_api.h
Original file line number Diff line number Diff line change
Expand Up @@ -319,6 +319,12 @@ class PD_INFER_DECL PaddlePredictor {
///
virtual void ClearIntermediateTensor() {}

///
/// \brief Release all tmp tensor to compress the size of the memory pool.
/// After this operation, we reduced the memory usage.
///
virtual void ShrinkMemory() {}

/// \brief Clone an existing predictor
/// When using clone, the same network will be created,
/// and the parameters between them are shared.
Expand Down
6 changes: 6 additions & 0 deletions paddle/fluid/inference/api/paddle_inference_api.h
Original file line number Diff line number Diff line change
Expand Up @@ -224,6 +224,12 @@ class PD_INFER_DECL Predictor {
/// \brief Clear the intermediate tensors of the predictor
void ClearIntermediateTensor();

///
/// \brief Release all tmp tensor to compress the size of the memory pool.
/// After this operation, we reduced the memory usage.
///
void ShrinkMemory();

private:
std::unique_ptr<paddle::PaddlePredictor> predictor_;
};
Expand Down
2 changes: 2 additions & 0 deletions paddle/fluid/pybind/inference_api.cc
Original file line number Diff line number Diff line change
Expand Up @@ -566,6 +566,7 @@ void BindAnalysisPredictor(py::module *m) {
.def("zero_copy_run", &AnalysisPredictor::ZeroCopyRun)
.def("clear_intermediate_tensor",
&AnalysisPredictor::ClearIntermediateTensor)
.def("shrink_memory", &AnalysisPredictor::ShrinkMemory)
.def("create_feed_fetch_var", &AnalysisPredictor::CreateFeedFetchVar)
.def("prepare_feed_fetch", &AnalysisPredictor::PrepareFeedFetch)
.def("prepare_argument", &AnalysisPredictor::PrepareArgument)
Expand Down Expand Up @@ -593,6 +594,7 @@ void BindPaddleInferPredictor(py::module *m) {
.def("get_output_handle", &paddle_infer::Predictor::GetOutputHandle)
.def("run", &paddle_infer::Predictor::Run)
.def("clone", &paddle_infer::Predictor::Clone)
.def("shrink_memory", &paddle_infer::Predictor::ShrinkMemory)
.def("clear_intermediate_tensor",
&paddle_infer::Predictor::ClearIntermediateTensor);
}
Expand Down