Skip to content

Commit 7cb44d9

Browse files
committed
update.
1 parent 95b7bb4 commit 7cb44d9

File tree

7 files changed

+31
-14
lines changed

7 files changed

+31
-14
lines changed

paddle/fluid/inference/api/analysis_predictor.cc

Lines changed: 5 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -889,9 +889,9 @@ bool AnalysisPredictor::LoadParameters() {
889889
return true;
890890
}
891891

892-
void AnalysisPredictor::ShrinkMemory() {
892+
uint64_t AnalysisPredictor::TryShrinkMemory() {
893893
ClearIntermediateTensor();
894-
paddle::memory::Release(place_);
894+
return paddle::memory::Release(place_);
895895
}
896896

897897
void AnalysisPredictor::ClearIntermediateTensor() {
@@ -982,6 +982,8 @@ AnalysisPredictor::~AnalysisPredictor() {
982982
if (sub_scope_) {
983983
scope_->DeleteScope(sub_scope_);
984984
}
985+
// TODO(wilber): release weight memory.
986+
paddle::memory::Release(place_);
985987

986988
#if PADDLE_WITH_MKLDNN
987989
if (mkldnn_quantizer_) {
@@ -1146,7 +1148,7 @@ void Predictor::ClearIntermediateTensor() {
11461148
predictor_->ClearIntermediateTensor();
11471149
}
11481150

1149-
void Predictor::ShrinkMemory() { predictor_->ShrinkMemory(); }
1151+
uint64_t Predictor::TryShrinkMemory() { return predictor_->TryShrinkMemory(); }
11501152

11511153
int GetNumBytesOfDataType(DataType dtype) {
11521154
switch (dtype) {

paddle/fluid/inference/api/analysis_predictor.h

Lines changed: 7 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -195,9 +195,14 @@ class AnalysisPredictor : public PaddlePredictor {
195195

196196
///
197197
/// \brief Release all tmp tensor to compress the size of the memory pool.
198-
/// After this operation, we reduced the memory usage.
198+
/// The memory pool is considered to be composed of a list of chunks, if
199+
/// the chunk is not occupied, it can be released.
199200
///
200-
void ShrinkMemory() override;
201+
/// \return Number of bytes released. It may be smaller than the actual
202+
/// released memory, because part of the memory is not managed by the
203+
/// MemoryPool.
204+
///
205+
uint64_t TryShrinkMemory() override;
201206

202207
///
203208
/// \brief Get the argument used by predictor

paddle/fluid/inference/api/analysis_predictor_tester.cc

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -135,7 +135,7 @@ TEST(AnalysisPredictor, ZeroCopy) {
135135
auto* out_data = out->data<float>(&place, &size);
136136
LOG(INFO) << "output size: " << size / sizeof(float);
137137
LOG(INFO) << "output_data: " << out_data;
138-
predictor->ShrinkMemory();
138+
predictor->TryShrinkMemory();
139139
}
140140

141141
TEST(AnalysisPredictor, Clone) {
@@ -546,7 +546,7 @@ TEST(Predictor, Run) {
546546
int size = 0;
547547
out->data<float>(&place, &size);
548548
LOG(INFO) << "output size: " << size / sizeof(float);
549-
predictor->ShrinkMemory();
549+
predictor->TryShrinkMemory();
550550
}
551551

552552
} // namespace paddle_infer

paddle/fluid/inference/api/api_tester.cc

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -60,7 +60,7 @@ TEST(paddle_inference_api, demo) {
6060
auto predictor = CreatePaddlePredictor(config);
6161
std::vector<PaddleTensor> outputs;
6262
predictor->Run({}, &outputs);
63-
predictor->ShrinkMemory();
63+
predictor->TryShrinkMemory();
6464
}
6565

6666
TEST(paddle_inference_api, get_version) {

paddle/fluid/inference/api/paddle_api.h

Lines changed: 7 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -321,9 +321,14 @@ class PD_INFER_DECL PaddlePredictor {
321321

322322
///
323323
/// \brief Release all tmp tensor to compress the size of the memory pool.
324-
/// After this operation, we reduced the memory usage.
324+
/// The memory pool is considered to be composed of a list of chunks, if
325+
/// the chunk is not occupied, it can be released.
325326
///
326-
virtual void ShrinkMemory() {}
327+
/// \return Number of bytes released. It may be smaller than the actual
328+
/// released memory, because part of the memory is not managed by the
329+
/// MemoryPool.
330+
///
331+
virtual uint64_t TryShrinkMemory() { return 0; }
327332

328333
/// \brief Clone an existing predictor
329334
/// When using clone, the same network will be created,

paddle/fluid/inference/api/paddle_inference_api.h

Lines changed: 7 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -226,9 +226,14 @@ class PD_INFER_DECL Predictor {
226226

227227
///
228228
/// \brief Release all tmp tensor to compress the size of the memory pool.
229-
/// After this operation, we reduced the memory usage.
229+
/// The memory pool is considered to be composed of a list of chunks, if
230+
/// the chunk is not occupied, it can be released.
230231
///
231-
void ShrinkMemory();
232+
/// \return Number of bytes released. It may be smaller than the actual
233+
/// released memory, because part of the memory is not managed by the
234+
/// MemoryPool.
235+
///
236+
uint64_t TryShrinkMemory();
232237

233238
private:
234239
std::unique_ptr<paddle::PaddlePredictor> predictor_;

paddle/fluid/pybind/inference_api.cc

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -566,7 +566,7 @@ void BindAnalysisPredictor(py::module *m) {
566566
.def("zero_copy_run", &AnalysisPredictor::ZeroCopyRun)
567567
.def("clear_intermediate_tensor",
568568
&AnalysisPredictor::ClearIntermediateTensor)
569-
.def("shrink_memory", &AnalysisPredictor::ShrinkMemory)
569+
.def("try_shrink_memory", &AnalysisPredictor::TryShrinkMemory)
570570
.def("create_feed_fetch_var", &AnalysisPredictor::CreateFeedFetchVar)
571571
.def("prepare_feed_fetch", &AnalysisPredictor::PrepareFeedFetch)
572572
.def("prepare_argument", &AnalysisPredictor::PrepareArgument)
@@ -594,7 +594,7 @@ void BindPaddleInferPredictor(py::module *m) {
594594
.def("get_output_handle", &paddle_infer::Predictor::GetOutputHandle)
595595
.def("run", &paddle_infer::Predictor::Run)
596596
.def("clone", &paddle_infer::Predictor::Clone)
597-
.def("shrink_memory", &paddle_infer::Predictor::ShrinkMemory)
597+
.def("try_shrink_memory", &paddle_infer::Predictor::TryShrinkMemory)
598598
.def("clear_intermediate_tensor",
599599
&paddle_infer::Predictor::ClearIntermediateTensor);
600600
}

0 commit comments

Comments
 (0)