diff --git a/sycl/plugins/level_zero/usm_allocator.cpp b/sycl/plugins/level_zero/usm_allocator.cpp index 9cd95ef25db0d..b22cfdb18c99f 100644 --- a/sycl/plugins/level_zero/usm_allocator.cpp +++ b/sycl/plugins/level_zero/usm_allocator.cpp @@ -414,28 +414,50 @@ class Bucket { // routines, slab map and etc. USMAllocContext::USMAllocImpl &OwnAllocCtx; + // For buckets used in chunked mode, a counter of slabs in the pool. + // For allocations that use an entire slab each, the entries in the Available + // list are entries in the pool.Each slab is available for a new + // allocation.The size of the Available list is the size of the pool. + // For allocations that use slabs in chunked mode, slabs will be in the + // Available list if any one or more of their chunks is free.The entire slab + // is not necessarily free, just some chunks in the slab are free. To + // implement pooling we will allow one slab in the Available list to be + // entirely empty. Normally such a slab would have been freed from USM. But + // now we don't, and treat this slab as "in the pool". + // When a slab becomes entirely free we have to decide whether to return it to + // USM or keep it allocated. A simple check for size of the Available list is + // not sufficient to check whether any slab has been pooled yet.We would have + // to traverse the entire Available listand check if any of them is entirely + // free. Instead we keep a counter of entirely empty slabs within the + // Available list to speed up the process of checking if a slab in this bucket + // is already pooled. + size_t chunkedSlabsInPool; + // Statistics - size_t allocCount; size_t allocPoolCount; size_t freeCount; size_t currSlabsInUse; size_t currSlabsInPool; - size_t maxSlabsInUse; size_t maxSlabsInPool; public: + // Statistics + size_t allocCount; + size_t maxSlabsInUse; + Bucket(size_t Sz, USMAllocContext::USMAllocImpl &AllocCtx) - : Size{Sz}, OwnAllocCtx{AllocCtx}, allocCount(0), allocPoolCount(0), - freeCount(0), currSlabsInUse(0), currSlabsInPool(0), maxSlabsInUse(0), - maxSlabsInPool(0) {} + : Size{Sz}, OwnAllocCtx{AllocCtx}, chunkedSlabsInPool(0), + allocPoolCount(0), freeCount(0), currSlabsInUse(0), currSlabsInPool(0), + maxSlabsInPool(0), allocCount(0), maxSlabsInUse(0) {} // Get pointer to allocation that is one piece of an available slab in this // bucket. - void *getChunk(bool &FromAllocatedSlab); + void *getChunk(bool &FromPool); // Get pointer to allocation that is a full slab in this bucket. void *getSlab(bool &FromPool); + // Return the allocation size of this bucket. size_t getSize() const { return Size; } // Free an allocation that is one piece of a slab in this bucket. @@ -451,11 +473,14 @@ class Bucket { USMAllocContext::USMAllocImpl &getUsmAllocCtx() { return OwnAllocCtx; } // Check whether an allocation to be freed can be placed in the pool. - bool CanPool(); + bool CanPool(bool &ToPool); - // The minimum allocation size for a slab in this bucket. + // The minimum allocation size for any slab. size_t SlabMinSize(); + // The allocation size for a slab in this bucket. + size_t SlabAllocSize(); + // The minimum size of a chunk from this bucket's slabs. size_t ChunkCutOff(); @@ -475,17 +500,19 @@ class Bucket { void updateStats(int InUse, int InPool); // Print bucket statistics - void printStats(); + void printStats(bool &TitlePrinted, SystemMemory::MemType MT); private: void onFreeChunk(Slab &, bool &ToPool); + // Update statistics of pool usage, and indicate that an allocation was made + // from the pool. + void decrementPool(bool &FromPool); + // Get a slab to be used for chunked allocations. - // These slabs are used for allocations <= ChunkCutOff and not pooled. - decltype(AvailableSlabs.begin()) getAvailSlab(bool &FromAllocatedSlab); + decltype(AvailableSlabs.begin()) getAvailSlab(bool &FromPool); // Get a slab that will be used as a whole for a single allocation. - // These slabs are > ChunkCutOff in size and pooled. decltype(AvailableSlabs.begin()) getAvailFullSlab(bool &FromPool); }; @@ -531,7 +558,8 @@ class USMAllocContext::USMAllocImpl { return USMSettings.SlabMinSize[(*MemHandle).getMemType()]; }; - void printStats(); + void printStats(bool &TitlePrinted, size_t &HighBucketSize, + size_t &HighPeakSlabsInUse, SystemMemory::MemType MT); private: Bucket &findBucket(size_t Size); @@ -552,10 +580,8 @@ Slab::Slab(Bucket &Bkt) // some padding at the end of the slab. Chunks(Bkt.SlabMinSize() / Bkt.getSize()), NumAllocated{0}, bucket(Bkt), SlabListIter{}, FirstFreeChunkIdx{0} { - size_t SlabAllocSize = Bkt.getSize(); - if (SlabAllocSize < Bkt.SlabMinSize()) - SlabAllocSize = Bkt.SlabMinSize(); - MemPtr = Bkt.getMemHandle().allocate(SlabAllocSize); + auto SlabSize = Bkt.SlabAllocSize(); + MemPtr = Bkt.getMemHandle().allocate(SlabSize); regSlab(*this); } @@ -673,6 +699,14 @@ void *Slab::getEnd() const { bool Slab::hasAvail() { return NumAllocated != getNumChunks(); } +// If a slab was available in the pool then note that the current pooled +// size has reduced by the size of a slab in this bucket. +void Bucket::decrementPool(bool &FromPool) { + FromPool = true; + updateStats(1, -1); + USMSettings.CurPoolSize -= SlabAllocSize(); +} + auto Bucket::getAvailFullSlab(bool &FromPool) -> decltype(AvailableSlabs.begin()) { // Return a slab that will be used for a single allocation. @@ -681,17 +715,9 @@ auto Bucket::getAvailFullSlab(bool &FromPool) std::make_unique(*this)); (*It)->setIterator(It); FromPool = false; - if (USMSettings.PoolTrace > 1) - updateStats(1, 0); + updateStats(1, 0); } else { - // If a slab was available in the pool then note that the current pooled - // size has reduced by the size of this slab. - FromPool = true; - if (USMSettings.PoolTrace > 1) { - updateStats(1, -1); - USMSettings.CurPoolSizes[getMemType()] -= Size; - } - USMSettings.CurPoolSize -= Size; + decrementPool(FromPool); } return AvailableSlabs.begin(); @@ -713,47 +739,44 @@ void Bucket::freeSlab(Slab &Slab, bool &ToPool) { std::lock_guard Lg(BucketLock); auto SlabIter = Slab.getIterator(); assert(SlabIter != UnavailableSlabs.end()); - if (CanPool()) { + if (CanPool(ToPool)) { auto It = AvailableSlabs.insert(AvailableSlabs.begin(), std::move(*SlabIter)); UnavailableSlabs.erase(SlabIter); (*It)->setIterator(It); - - if (USMSettings.PoolTrace > 1) { - updateStats(-1, 1); - ToPool = true; - } } else { UnavailableSlabs.erase(SlabIter); - - if (USMSettings.PoolTrace > 1) { - updateStats(-1, 0); - ToPool = false; - } } } -auto Bucket::getAvailSlab(bool &FromAllocatedSlab) - -> decltype(AvailableSlabs.begin()) { +auto Bucket::getAvailSlab(bool &FromPool) -> decltype(AvailableSlabs.begin()) { - FromAllocatedSlab = true; if (AvailableSlabs.size() == 0) { auto It = AvailableSlabs.insert(AvailableSlabs.begin(), std::make_unique(*this)); (*It)->setIterator(It); - if (USMSettings.PoolTrace > 1) - updateStats(1, 0); - FromAllocatedSlab = false; + updateStats(1, 0); + FromPool = false; + } else { + if ((*(AvailableSlabs.begin()))->getNumAllocated() == 0) { + // If this was an empty slab, it was in the pool. + // Now it is no longer in the pool, so update count. + --chunkedSlabsInPool; + decrementPool(FromPool); + } else { + // Allocation from existing slab is treated as from pool for statistics. + FromPool = true; + } } return AvailableSlabs.begin(); } -void *Bucket::getChunk(bool &FromAllocatedSlab) { +void *Bucket::getChunk(bool &FromPool) { std::lock_guard Lg(BucketLock); - auto SlabIt = getAvailSlab(FromAllocatedSlab); + auto SlabIt = getAvailSlab(FromPool); auto *FreeChunk = (*SlabIt)->getChunk(); // If the slab is full, move it to unavailable slabs and update its iterator @@ -792,33 +815,45 @@ void Bucket::onFreeChunk(Slab &Slab, bool &ToPool) { (*It)->setIterator(It); } - // Remove the slab when all the chunks from it are deallocated - // Note: since the slab is stored as unique_ptr, just remove it from - // the list to remove the list to destroy the object + // Check if slab is empty, and pool it if we can. if (Slab.getNumAllocated() == 0) { - auto It = Slab.getIterator(); - assert(It != AvailableSlabs.end()); - - AvailableSlabs.erase(It); - - if (USMSettings.PoolTrace > 1) - updateStats(-1, 0); - - ToPool = false; + // The slab is now empty. + // If pool has capacity then put the slab in the pool. + // The ToPool parameter indicates whether the Slab will be put in the pool + // or freed from USM. + if (!CanPool(ToPool)) { + // Note: since the slab is stored as unique_ptr, just remove it from + // the list to destroy the object. + auto It = Slab.getIterator(); + assert(It != AvailableSlabs.end()); + AvailableSlabs.erase(It); + } } } -bool Bucket::CanPool() { +bool Bucket::CanPool(bool &ToPool) { std::lock_guard Lock{PoolLock}; - size_t NewFreeSlabsInBucket = AvailableSlabs.size() + 1; + size_t NewFreeSlabsInBucket; + // Check if this bucket is used in chunked form or as full slabs. + bool chunkedBucket = getSize() <= ChunkCutOff(); + if (chunkedBucket) + NewFreeSlabsInBucket = chunkedSlabsInPool + 1; + else + NewFreeSlabsInBucket = AvailableSlabs.size() + 1; if (Capacity() >= NewFreeSlabsInBucket) { - size_t NewPoolSize = USMSettings.CurPoolSize + Size; + size_t NewPoolSize = USMSettings.CurPoolSize + SlabAllocSize(); if (USMSettings.MaxPoolSize >= NewPoolSize) { USMSettings.CurPoolSize = NewPoolSize; - USMSettings.CurPoolSizes[getMemType()] += Size; + if (chunkedBucket) + ++chunkedSlabsInPool; + + updateStats(-1, 1); + ToPool = true; return true; } } + updateStats(-1, 0); + ToPool = false; return false; } @@ -830,7 +865,16 @@ SystemMemory::MemType Bucket::getMemType() { size_t Bucket::SlabMinSize() { return USMSettings.SlabMinSize[getMemType()]; } -size_t Bucket::Capacity() { return USMSettings.Capacity[getMemType()]; } +size_t Bucket::SlabAllocSize() { return std::max(getSize(), SlabMinSize()); } + +size_t Bucket::Capacity() { + // For buckets used in chunked mode, just one slab in pool is sufficient. + // For larger buckets, the capacity could be more and is adjustable. + if (getSize() <= ChunkCutOff()) + return 1; + else + return USMSettings.Capacity[getMemType()]; +} size_t Bucket::MaxPoolableSize() { return USMSettings.MaxPoolableSize[getMemType()]; @@ -847,14 +891,28 @@ void Bucket::countAlloc(bool FromPool) { void Bucket::countFree() { ++freeCount; } void Bucket::updateStats(int InUse, int InPool) { + if (USMSettings.PoolTrace == 0) + return; currSlabsInUse += InUse; maxSlabsInUse = std::max(currSlabsInUse, maxSlabsInUse); currSlabsInPool += InPool; maxSlabsInPool = std::max(currSlabsInPool, maxSlabsInPool); + // Increment or decrement current pool sizes based on whether + // slab was added to or removed from pool. + USMSettings.CurPoolSizes[getMemType()] += InPool * SlabAllocSize(); } -void Bucket::printStats() { +void Bucket::printStats(bool &TitlePrinted, SystemMemory::MemType MT) { if (allocCount) { + if (!TitlePrinted) { + auto Label = MemTypeNames[MT]; + std::cout << Label << " memory statistics\n"; + std::cout << std::setw(14) << "Bucket Size" << std::setw(12) << "Allocs" + << std::setw(12) << "Frees" << std::setw(18) + << "Allocs from Pool" << std::setw(20) << "Peak Slabs in Use" + << std::setw(21) << "Peak Slabs in Pool" << std::endl; + TitlePrinted = true; + } std::cout << std::setw(14) << getSize() << std::setw(12) << allocCount << std::setw(12) << freeCount << std::setw(18) << allocPoolCount << std::setw(20) << maxSlabsInUse << std::setw(21) @@ -982,6 +1040,7 @@ USMAllocContext::USMAllocContext(std::unique_ptr MemHandle) : pImpl(std::make_unique(std::move(MemHandle))) {} void *USMAllocContext::allocate(size_t size) { + // For full-slab allocations indicates whether slab is from Pool. bool FromPool; auto Ptr = pImpl->allocate(size, FromPool); @@ -1023,27 +1082,36 @@ void USMAllocContext::deallocate(void *ptr, bool OwnZeMemHandle) { return; } -// Define destructor for its usage with unique_ptr +// Define destructor for use with unique_ptr USMAllocContext::~USMAllocContext() { + bool TitlePrinted = false; + size_t HighBucketSize; + size_t HighPeakSlabsInUse; if (USMSettings.PoolTrace > 1) { - auto Label = "Shared"; - if (pImpl->getMemHandle().getMemType() == SystemMemory::Host) - Label = "Host"; - if (pImpl->getMemHandle().getMemType() == SystemMemory::Device) - Label = "Device"; - std::cout << Label << " memory statistics\n"; - pImpl->printStats(); - std::cout << "Current Pool Size " << USMSettings.CurPoolSize << std::endl; + SystemMemory::MemType MT = pImpl->getMemHandle().getMemType(); + pImpl->printStats(TitlePrinted, HighBucketSize, HighPeakSlabsInUse, MT); + if (TitlePrinted) { + std::cout << "Current Pool Size " << USMSettings.CurPoolSize << std::endl; + const char *Label = MemTypeNames[MT]; + std::cout << "Suggested Setting: SYCL_PI_LEVEL_ZERO_USM_ALLOCATOR=;" + << std::string(1, tolower(*Label)) << std::string(Label + 1) + << ":" << HighBucketSize << "," << HighPeakSlabsInUse << ",64K" + << std::endl; + } } } -void USMAllocContext::USMAllocImpl::printStats() { - std::cout << std::setw(14) << "Bucket Size" << std::setw(12) << "Allocs" - << std::setw(12) << "Frees" << std::setw(18) << "Allocs From Pool" - << std::setw(20) << "Peak Slabs In Use" << std::setw(21) - << "Peak Slabs in Pool" << std::endl; +void USMAllocContext::USMAllocImpl::printStats(bool &TitlePrinted, + size_t &HighBucketSize, + size_t &HighPeakSlabsInUse, + SystemMemory::MemType MT) { + HighBucketSize = 0; + HighPeakSlabsInUse = 0; for (auto &B : Buckets) { - (*B).printStats(); + (*B).printStats(TitlePrinted, MT); + HighPeakSlabsInUse = std::max((*B).maxSlabsInUse, HighPeakSlabsInUse); + if ((*B).allocCount) + HighBucketSize = std::max((*B).SlabAllocSize(), HighBucketSize); } }