-
Notifications
You must be signed in to change notification settings - Fork 3.4k
HBASE-25698 Fixing IllegalReferenceCountException when using TinyLfuBlockCache #3215
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -22,6 +22,7 @@ | |
| import static org.apache.hadoop.hbase.io.ByteBuffAllocator.BUFFER_SIZE_KEY; | ||
| import static org.apache.hadoop.hbase.io.ByteBuffAllocator.MAX_BUFFER_COUNT_KEY; | ||
| import static org.apache.hadoop.hbase.io.ByteBuffAllocator.MIN_ALLOCATE_SIZE_KEY; | ||
| import static org.apache.hadoop.hbase.io.hfile.BlockCacheFactory.BLOCKCACHE_POLICY_KEY; | ||
| import static org.apache.hadoop.hbase.io.hfile.CacheConfig.EVICT_BLOCKS_ON_CLOSE_KEY; | ||
| import static org.junit.Assert.assertEquals; | ||
| import static org.junit.Assert.assertFalse; | ||
|
|
@@ -205,10 +206,11 @@ public void testReaderWithLRUBlockCache() throws Exception { | |
| lru.shutdown(); | ||
| } | ||
|
|
||
| private BlockCache initCombinedBlockCache() { | ||
| private BlockCache initCombinedBlockCache(final String l1CachePolicy) { | ||
| Configuration that = HBaseConfiguration.create(conf); | ||
| that.setFloat(BUCKET_CACHE_SIZE_KEY, 32); // 32MB for bucket cache. | ||
| that.set(BUCKET_CACHE_IOENGINE_KEY, "offheap"); | ||
| that.set(BLOCKCACHE_POLICY_KEY, l1CachePolicy); | ||
| BlockCache bc = BlockCacheFactory.createBlockCache(that); | ||
| Assert.assertNotNull(bc); | ||
| Assert.assertTrue(bc instanceof CombinedBlockCache); | ||
|
|
@@ -225,7 +227,7 @@ public void testReaderWithCombinedBlockCache() throws Exception { | |
| fillByteBuffAllocator(alloc, bufCount); | ||
| Path storeFilePath = writeStoreFile(); | ||
| // Open the file reader with CombinedBlockCache | ||
| BlockCache combined = initCombinedBlockCache(); | ||
| BlockCache combined = initCombinedBlockCache("LRU"); | ||
| conf.setBoolean(EVICT_BLOCKS_ON_CLOSE_KEY, true); | ||
| CacheConfig cacheConfig = new CacheConfig(conf, null, combined, alloc); | ||
| HFile.Reader reader = HFile.createReader(fs, storeFilePath, cacheConfig, true, conf); | ||
|
|
@@ -890,4 +892,72 @@ public void testDBEShipped() throws IOException { | |
| writer.close(); | ||
| } | ||
| } | ||
|
|
||
| /** | ||
| * Test case for CombinedBlockCache with TinyLfu as L1 cache | ||
| */ | ||
| @Test | ||
| public void testReaderWithTinyLfuCombinedBlockCache() throws Exception { | ||
virajjasani marked this conversation as resolved.
Show resolved
Hide resolved
|
||
| testReaderCombinedCache("TinyLfu"); | ||
| } | ||
|
|
||
| /** | ||
| * Test case for CombinedBlockCache with AdaptiveLRU as L1 cache | ||
| */ | ||
| @Test | ||
| public void testReaderWithAdaptiveLruCombinedBlockCache() throws Exception { | ||
| testReaderCombinedCache("AdaptiveLRU"); | ||
| } | ||
|
|
||
| /** | ||
| * Test case for CombinedBlockCache with AdaptiveLRU as L1 cache | ||
| */ | ||
| @Test | ||
| public void testReaderWithLruCombinedBlockCache() throws Exception { | ||
| testReaderCombinedCache("LRU"); | ||
| } | ||
|
|
||
| private void testReaderCombinedCache(final String l1CachePolicy) throws Exception { | ||
| int bufCount = 1024; | ||
| int blockSize = 64 * 1024; | ||
| ByteBuffAllocator alloc = initAllocator(true, bufCount, blockSize, 0); | ||
| fillByteBuffAllocator(alloc, bufCount); | ||
| Path storeFilePath = writeStoreFile(); | ||
| // Open the file reader with CombinedBlockCache | ||
| BlockCache combined = initCombinedBlockCache(l1CachePolicy); | ||
| conf.setBoolean(EVICT_BLOCKS_ON_CLOSE_KEY, true); | ||
| CacheConfig cacheConfig = new CacheConfig(conf, null, combined, alloc); | ||
| HFile.Reader reader = HFile.createReader(fs, storeFilePath, cacheConfig, true, conf); | ||
| long offset = 0; | ||
| Cacheable cachedBlock = null; | ||
| while (offset < reader.getTrailer().getLoadOnOpenDataOffset()) { | ||
| BlockCacheKey key = new BlockCacheKey(storeFilePath.getName(), offset); | ||
| HFileBlock block = reader.readBlock(offset, -1, true, true, false, true, null, null); | ||
| offset += block.getOnDiskSizeWithHeader(); | ||
| // Read the cached block. | ||
| cachedBlock = combined.getBlock(key, false, false, true); | ||
| try { | ||
| Assert.assertNotNull(cachedBlock); | ||
| Assert.assertTrue(cachedBlock instanceof HFileBlock); | ||
| HFileBlock hfb = (HFileBlock) cachedBlock; | ||
| // Data block will be cached in BucketCache, so it should be an off-heap block. | ||
| if (hfb.getBlockType().isData()) { | ||
| Assert.assertTrue(hfb.isSharedMem()); | ||
| } else if (!l1CachePolicy.equals("TinyLfu")) { | ||
| Assert.assertFalse(hfb.isSharedMem()); | ||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. What is this check? Means in TinyLFU case there is some diff compared to other 2 types when it comes to non data blocks?
Contributor
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Yes, that's what it looks like. I got to know from this test. Shall we continue having this check? Or you think different treatment of non-data blocks is an issue? |
||
| } | ||
| } finally { | ||
| cachedBlock.release(); | ||
| } | ||
| block.release(); // return back the ByteBuffer back to allocator. | ||
| } | ||
| reader.close(); | ||
| combined.shutdown(); | ||
| if (cachedBlock != null) { | ||
| Assert.assertEquals(0, cachedBlock.refCnt()); | ||
| } | ||
| Assert.assertEquals(bufCount, alloc.getFreeBufferCount()); | ||
| alloc.clean(); | ||
virajjasani marked this conversation as resolved.
Show resolved
Hide resolved
|
||
| } | ||
|
|
||
| } | ||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Smile. Related. I've been studying at this bit of code here.. the flip to computeIfPresent. #get in CHM is lockless but computeIfPresent takes a lock on the bucket (of keys) so seems to slow us down especially if high read load with most data in cache; e.g. meta hits.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Hmm, yeah locks would slow us down. On the other hand, based on discussion on HBASE-22422 , it seems computeIfPresent (locking) is necessary to prevent concurrency issues with #retain and #release.
Based on @openinx's comment here, wondering if the sawtooth graph of QPS is similar concurrency issue and not resolved yet.
@saintstack Any suggestions? Have you been using Offheap read path with LRU recently?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Thanks @virajjasani ... I just read over the HBASE-22422. Nice work by @openinx . Agree need locking but currently the lock spans more than the buffer... covering CHM bucket. We might be able to scope the lock to the buffer... but I'm not sure and a little afraid to touch the code here.
On the sawtooth, I've not looked.
Not using offheap. All onheap but in async-profiler, the CHM#computeIfPresent is the main blocking point by far (Trying to drive up the throughput when inmemory meta lookups).
On this patch generally, +1. I'd noticed messing w/ this stuff that tinylfu was missing this bit of code... I'd also noticed that the locking profile with tinylfu in place looked much better .... I'd failed to put the two bits of info together. My sense is that once this goes in, that tinylfu will be the main blocker... just like CHM is now.
Oddly, for me, tinylfu seemed to run slower in my compares which I didn't expect given the nice batching it does and its WAL scheme. Its probably variance in my test rig. Working on it.....
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I suspect that you might be able to switch to an optimistic read, where you validate that the
cacheable.retain()was successful. My naive inspection of the code is that it will throw anIllegalReferenceCountException, which could be caught and returned as a cache miss. Because it is decremented after the entry was removed from the mapping (inevictBlock, though I don't see a release in the removal listener), the stale read should be fine as a subsequent read/write would not observe that discarded entry.In YCSB benchmarks,
TinyLfuBlockCacheis a little bit slower because Caffeine adds a small overhead on reads to have O(1) evictions with a higher hit rate. In comparison, theLruBlockCachedoes no policy work on read and performs an O(n lg n) operation on eviction. Due to using an SLRU policy, the Zipfian workload has the same optimal hit rate, causing only the small policy cost to on reads to be exposed.In a realistic workload, however, one will expect that the TinyLfu policy should have an improved hit rate which will reduces latencies (more requests served from cache, reducing I/O and cpu load). The O(1) eviction should be helpful for large caches when the system in under high load, as it avoids a spike of work and amortizes the cost.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Thanks for showing up @ben-manes .... Let me look at your suggestion... Its promising.
On tinylfu being 'slower', I'm trying to repro a production workload... What I have is messy at the moment still in need of work. My test compares were coarse. I didn't want to say too much more until I had solidified the test rig.... (as noted above).
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Thanks @saintstack. This was from my analysis when contributing the original patch.
Zipfian is wonderful for a perf benchmark by stressing locks, etc. to find bottlenecks, but isn't realistic for actual production performance. I'm not sure if there is a great approach other than network record/replay or canarying.
If you have a workload trace we can try to simulate that, where the hit rates should be better (e.g. database trace. That wouldn't show actual system behavior, just the cache's expected hit rates in isolation. HBase's LRU is similar-ish to SLRU, so then ARC might be a good upper bound of expectations.
Between zipfian benchmark and trace simulations, we can get a roughish idea of if there is a benefit. Otherwise canarying is the best that I've seen so far, which is a bit heavy handed but simple.
Uh oh!
There was an error while loading. Please reload this page.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I am sure many of perf regressions reported in HBase 2 (compared to HBase 1) in dev/user mailing lists related to read requests might be related to using CHM#computeIfPresent usages for every onheap and offheap cache hits. Yes, refCount makes code look better but on the other hand, we have perf issues.
I believe we should think about this and see if we really need netty based refCount, or at least continue using CHM#get and ride over
IllegalReferenceCountExceptionby swallowing and evicting the block (I believe that's what @ben-manes's suggestion is). And the final decision should be applicable to all l1Cache strategies: SLRU, TinyLfu, AdaptiveLRU.Otherwise BlockCache will have clear perf issues in HBase 1 vs HBase 2.
Uh oh!
There was an error while loading. Please reload this page.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
@saintstack @anoopsjohn @ben-manes How about this one? I am yet to benchmark this and perform chaos testing with this, but before I do it, just wanted to see if you are aligned with this rough patch.
And this perf improvement is to be followed by all L1 caching, something we can take up as a follow up task.
Uh oh!
There was an error while loading. Please reload this page.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I would switch to
map.remove(cacheKey, cb)so that a race doesn't discard a new mapping. If my naive reading is correct, thismap.remove(cacheKey)would already occur after thecb.release(), so this may not be necessary. That could mean that a new block was computed, so this remove discards the wrong block mistakenly. You might not need the map removal here if you can rely on the release being performed after the map operation completed.hbase/hbase-server/src/main/java/org/apache/hadoop/hbase/io/hfile/TinyLfuBlockCache.java
Lines 246 to 252 in 947c03c
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Sounds good,
map.remove(cacheKey, cb)too should not be required in this case. Thanks @ben-manes