@@ -44,20 +44,40 @@ PHI_DEFINE_EXPORTED_READONLY_bool(print_allocator_trace_info,
4444 " print trace memory info" );
4545
4646PHI_DEFINE_EXPORTED_READONLY_bool (dump_chunk_info, false , " dump chunk info" );
47- PHI_DEFINE_EXPORTED_uint64 (alignment_size, 256 , " alignment_size" );
48- PHI_DEFINE_EXPORTED_uint64 (small_pool_size_in_mb, 1 , " small_pool_size_in_mb" );
49- PHI_DEFINE_EXPORTED_uint64 (small_pool_auto_growth_chunk_size_in_mb,
50- 0 ,
51- " small_pool_auto_growth_chunk_size_in_mb" );
52- PHI_DEFINE_EXPORTED_uint64 (large_pool_auto_growth_chunk_size_in_mb,
53- 0 ,
54- " large_pool_auto_growth_chunk_size_in_mb" );
55- PHI_DEFINE_EXPORTED_uint64 (large_pool_pre_alloc_in_mb,
56- 0 ,
57- " large_pool_pre_alloc_in_mb" );
58- PHI_DEFINE_EXPORTED_uint64 (small_pool_pre_alloc_in_mb,
59- 0 ,
60- " small_pool_pre_alloc_in_mb" );
47+ PHI_DEFINE_EXPORTED_uint64 (
48+ alignment_size,
49+ 256 ,
50+ " All sizes are rounded up to a multiple of this value. Default: 256." );
51+ PHI_DEFINE_EXPORTED_uint64 (
52+ small_pool_size_in_mb,
53+ 0 ,
54+ " Threshold (MiB) separating the small and large pools. "
55+ " 0 disables the small pool and enables single-pool mode "
56+ " (all requests go to the large pool). When > 0, requests "
57+ " <= threshold use the small pool; larger requests use the "
58+ " large pool. Default: 0." );
59+ PHI_DEFINE_EXPORTED_uint64 (
60+ small_pool_auto_growth_chunk_size_in_mb,
61+ 0 ,
62+ " The minimal chunk size for the small pool in MiB. If > 0, this overrides "
63+ " the constructor-provided global growth size "
64+ " (FLAGS_auto_growth_chunk_size_in_mb) "
65+ " If 0, falls back to the global growth size." );
66+ PHI_DEFINE_EXPORTED_uint64 (
67+ large_pool_auto_growth_chunk_size_in_mb,
68+ 0 ,
69+ " The minimal chunk size for the large pool in MiB. If > 0, this overrides "
70+ " the constructor-provided global growth size "
71+ " (FLAGS_auto_growth_chunk_size_in_mb) "
72+ " If 0, falls back to the global growth size." );
73+ PHI_DEFINE_EXPORTED_uint64 (
74+ large_pool_pre_alloc_in_mb,
75+ 0 ,
76+ " Pre-reserve this many MiB in the large pool. 0 disables pre-allocation." );
77+ PHI_DEFINE_EXPORTED_uint64 (
78+ small_pool_pre_alloc_in_mb,
79+ 0 ,
80+ " Pre-reserve this many MiB in the small pool. 0 disables pre-allocation." );
6181
6282namespace paddle ::memory::allocation {
6383
@@ -112,28 +132,34 @@ bool AutoGrowthBestFitAllocator::is_small_free_block(size_t size) {
112132
113133size_t AutoGrowthBestFitAllocator::auto_growth_size (bool is_small,
114134 size_t chunk_size) {
115- size_t auto_growth_chunk_size = 0 ;
116- if (chunk_size > 0 ) {
117- auto_growth_chunk_size = chunk_size;
118- }
119-
120- if (is_small) {
121- auto_growth_chunk_size = FLAGS_small_pool_auto_growth_chunk_size_in_mb
122- << 20 ;
135+ // Priority: pool-specific flag (>0) > constructor-provided chunk_size (>0) >
136+ // member chunk_size_. Return value is aligned to alignment_ and at least
137+ // alignment_.
138+ const uint64_t pool_auto_growth_chunk_size_mb =
139+ is_small ? FLAGS_small_pool_auto_growth_chunk_size_in_mb
140+ : FLAGS_large_pool_auto_growth_chunk_size_in_mb;
141+ const size_t pool_auto_growth_chunk_size_bytes =
142+ pool_auto_growth_chunk_size_mb
143+ ? (static_cast <size_t >(pool_auto_growth_chunk_size_mb) << 20 )
144+ : 0 ;
145+
146+ size_t auto_growth_size = 0 ;
147+ if (pool_auto_growth_chunk_size_bytes) {
148+ auto_growth_size = pool_auto_growth_chunk_size_bytes; // 1) pool-specific
149+ // flag (MB -> bytes)
150+ } else if (chunk_size > 0 ) {
151+ auto_growth_size = chunk_size; // 2) value provided at construction (bytes)
123152 } else {
124- auto_growth_chunk_size = FLAGS_large_pool_auto_growth_chunk_size_in_mb
125- << 20 ;
153+ auto_growth_size =
154+ chunk_size_; // 3) member fallback (already aligned in constructor)
126155 }
127156
128- if (FLAGS_dump_chunk_info) {
129- std::cout << " is_small = " << is_small
130- << " auto_growth_size = " << auto_growth_chunk_size << std::endl;
131- }
132- return auto_growth_chunk_size;
157+ auto_growth_size = AlignedSize (auto_growth_size, alignment_);
158+
159+ return auto_growth_size;
133160}
134161
135162void AutoGrowthBestFitAllocator::PreAlloc () {
136- VLOG (10 ) << " AutoGrowthBestFitAllocator start PreAlloc " ;
137163 auto small_pool_pre_alloc = FLAGS_small_pool_pre_alloc_in_mb << 20 ;
138164 auto large_pool_pre_alloc = FLAGS_large_pool_pre_alloc_in_mb << 20 ;
139165 if (small_pool_pre_alloc > 0 ) {
@@ -144,7 +170,8 @@ void AutoGrowthBestFitAllocator::PreAlloc() {
144170 auto *chunk = &(*chunks_.rbegin ());
145171 uint8_t *p = reinterpret_cast <uint8_t *>(chunk->allocation_ ->ptr ());
146172 auto &blocks = chunk->blocks_ ;
147- blocks.emplace_back (p, small_pool_pre_alloc, true , true , chunk);
173+ blocks.emplace_back (
174+ p, small_pool_pre_alloc, /* is_free=*/ true , /* is_small=*/ true , chunk);
148175 small_free_blocks_.emplace (std::make_pair (small_pool_pre_alloc, p),
149176 --(blocks.end ()));
150177 }
@@ -157,7 +184,8 @@ void AutoGrowthBestFitAllocator::PreAlloc() {
157184 auto *chunk = &(*chunks_.rbegin ());
158185 uint8_t *p = reinterpret_cast <uint8_t *>(chunk->allocation_ ->ptr ());
159186 auto &blocks = chunk->blocks_ ;
160- blocks.emplace_back (p, large_pool_pre_alloc, true , true , chunk);
187+ blocks.emplace_back (
188+ p, large_pool_pre_alloc, /* is_free=*/ true , /* is_small=*/ false , chunk);
161189 large_free_blocks_.emplace (std::make_pair (large_pool_pre_alloc, p),
162190 --(blocks.end ()));
163191 }
@@ -252,10 +280,6 @@ phi::Allocation *AutoGrowthBestFitAllocator::AllocateImpl(
252280 total_alloc_size_ += size;
253281 VLOG (10 ) << " Alloc " << block_it->size_ << " bytes, ptr = " << block_it->ptr_ ;
254282 auto block_t = new BlockAllocation (block_it);
255- if (FLAGS_dump_chunk_info) {
256- DumpInfo ();
257- }
258- Trace ();
259283 return block_t ;
260284}
261285
0 commit comments