Skip to content

Commit 0411e38

Browse files
authored
HBASE-28839: Handle all types of exceptions during retrieval of bucket-cache from persistence. (apache#6250) (apache#6288)
Signed-off-by: Wellington Chevreuil <wchevreuil@apache.org>
1 parent 414f24e commit 0411e38

3 files changed

Lines changed: 74 additions & 87 deletions

File tree

hbase-server/src/main/java/org/apache/hadoop/hbase/io/hfile/bucket/BucketCache.java

Lines changed: 36 additions & 57 deletions
Original file line numberDiff line numberDiff line change
@@ -378,17 +378,18 @@ private void startPersistenceRetriever(int[] bucketSizes, long capacity) {
378378
try {
379379
retrieveFromFile(bucketSizes);
380380
LOG.info("Persistent bucket cache recovery from {} is complete.", persistencePath);
381-
} catch (IOException ioex) {
382-
LOG.error("Can't restore from file[{}] because of ", persistencePath, ioex);
381+
} catch (Throwable ex) {
382+
LOG.warn("Can't restore from file[{}]. The bucket cache will be reset and rebuilt."
383+
+ " Exception seen: ", persistencePath, ex);
383384
backingMap.clear();
384385
fullyCachedFiles.clear();
385386
backingMapValidated.set(true);
387+
regionCachedSize.clear();
386388
try {
387389
bucketAllocator = new BucketAllocator(capacity, bucketSizes);
388-
} catch (BucketAllocatorException ex) {
389-
LOG.error("Exception during Bucket Allocation", ex);
390+
} catch (BucketAllocatorException allocatorException) {
391+
LOG.error("Exception during Bucket Allocation", allocatorException);
390392
}
391-
regionCachedSize.clear();
392393
} finally {
393394
this.cacheState = CacheState.ENABLED;
394395
startWriterThreads();
@@ -939,7 +940,8 @@ public void logStats() {
939940
: (StringUtils.formatPercent(cacheStats.getHitCachingRatio(), 2) + ", "))
940941
+ "evictions=" + cacheStats.getEvictionCount() + ", " + "evicted="
941942
+ cacheStats.getEvictedCount() + ", " + "evictedPerRun=" + cacheStats.evictedPerEviction()
942-
+ ", " + "allocationFailCount=" + cacheStats.getAllocationFailCount());
943+
+ ", " + "allocationFailCount=" + cacheStats.getAllocationFailCount() + ", blocksCount="
944+
+ backingMap.size());
943945
cacheStats.reset();
944946

945947
bucketAllocator.logDebugStatistics();
@@ -1481,7 +1483,7 @@ private void retrieveFromFile(int[] bucketSizes) throws IOException {
14811483
} else if (Arrays.equals(pbuf, BucketProtoUtils.PB_MAGIC_V2)) {
14821484
// The new persistence format of chunked persistence.
14831485
LOG.info("Reading new chunked format of persistence.");
1484-
retrieveChunkedBackingMap(in, bucketSizes);
1486+
retrieveChunkedBackingMap(in);
14851487
} else {
14861488
// In 3.0 we have enough flexibility to dump the old cache data.
14871489
// TODO: In 2.x line, this might need to be filled in to support reading the old format
@@ -1575,17 +1577,7 @@ private void verifyFileIntegrity(BucketCacheProtos.BucketCacheEntry proto) {
15751577
}
15761578
}
15771579

1578-
private void parseFirstChunk(BucketCacheProtos.BucketCacheEntry firstChunk) throws IOException {
1579-
fullyCachedFiles.clear();
1580-
Pair<ConcurrentHashMap<BlockCacheKey, BucketEntry>, NavigableSet<BlockCacheKey>> pair =
1581-
BucketProtoUtils.fromPB(firstChunk.getDeserializersMap(), firstChunk.getBackingMap(),
1582-
this::createRecycler);
1583-
backingMap.putAll(pair.getFirst());
1584-
blocksByHFile.addAll(pair.getSecond());
1585-
fullyCachedFiles.putAll(BucketProtoUtils.fromPB(firstChunk.getCachedFilesMap()));
1586-
}
1587-
1588-
private void parseChunkPB(BucketCacheProtos.BackingMap chunk,
1580+
private void updateCacheIndex(BucketCacheProtos.BackingMap chunk,
15891581
java.util.Map<java.lang.Integer, java.lang.String> deserializer) throws IOException {
15901582
Pair<ConcurrentHashMap<BlockCacheKey, BucketEntry>, NavigableSet<BlockCacheKey>> pair2 =
15911583
BucketProtoUtils.fromPB(deserializer, chunk, this::createRecycler);
@@ -1611,55 +1603,42 @@ private void parsePB(BucketCacheProtos.BucketCacheEntry proto) throws IOExceptio
16111603
}
16121604

16131605
private void persistChunkedBackingMap(FileOutputStream fos) throws IOException {
1614-
long numChunks = backingMap.size() / persistenceChunkSize;
1615-
if (backingMap.size() % persistenceChunkSize != 0) {
1616-
numChunks += 1;
1617-
}
1618-
16191606
LOG.debug(
16201607
"persistToFile: before persisting backing map size: {}, "
1621-
+ "fullycachedFiles size: {}, chunkSize: {}, numberofChunks: {}",
1622-
backingMap.size(), fullyCachedFiles.size(), persistenceChunkSize, numChunks);
1608+
+ "fullycachedFiles size: {}, chunkSize: {}",
1609+
backingMap.size(), fullyCachedFiles.size(), persistenceChunkSize);
16231610

1624-
BucketProtoUtils.serializeAsPB(this, fos, persistenceChunkSize, numChunks);
1611+
BucketProtoUtils.serializeAsPB(this, fos, persistenceChunkSize);
16251612

16261613
LOG.debug(
1627-
"persistToFile: after persisting backing map size: {}, "
1628-
+ "fullycachedFiles size: {}, numChunksPersisteed: {}",
1629-
backingMap.size(), fullyCachedFiles.size(), numChunks);
1614+
"persistToFile: after persisting backing map size: {}, " + "fullycachedFiles size: {}",
1615+
backingMap.size(), fullyCachedFiles.size());
16301616
}
16311617

1632-
private void retrieveChunkedBackingMap(FileInputStream in, int[] bucketSizes) throws IOException {
1633-
byte[] bytes = new byte[Long.BYTES];
1634-
int readSize = in.read(bytes);
1635-
if (readSize != Long.BYTES) {
1636-
throw new IOException("Invalid size of chunk-size read from persistence: " + readSize);
1637-
}
1638-
long batchSize = Bytes.toLong(bytes, 0);
1639-
1640-
readSize = in.read(bytes);
1641-
if (readSize != Long.BYTES) {
1642-
throw new IOException("Invalid size for number of chunks read from persistence: " + readSize);
1643-
}
1644-
long numChunks = Bytes.toLong(bytes, 0);
1645-
1646-
LOG.info("Number of chunks: {}, chunk size: {}", numChunks, batchSize);
1618+
private void retrieveChunkedBackingMap(FileInputStream in) throws IOException {
16471619

16481620
// Read the first chunk that has all the details.
1649-
BucketCacheProtos.BucketCacheEntry firstChunk =
1621+
BucketCacheProtos.BucketCacheEntry cacheEntry =
16501622
BucketCacheProtos.BucketCacheEntry.parseDelimitedFrom(in);
1651-
parseFirstChunk(firstChunk);
1652-
1653-
// Subsequent chunks have the backingMap entries.
1654-
for (int i = 1; i < numChunks; i++) {
1655-
LOG.info("Reading chunk no: {}", i + 1);
1656-
parseChunkPB(BucketCacheProtos.BackingMap.parseDelimitedFrom(in),
1657-
firstChunk.getDeserializersMap());
1658-
LOG.info("Retrieved chunk: {}", i + 1);
1659-
}
1660-
verifyFileIntegrity(firstChunk);
1661-
verifyCapacityAndClasses(firstChunk.getCacheCapacity(), firstChunk.getIoClass(),
1662-
firstChunk.getMapClass());
1623+
1624+
fullyCachedFiles.clear();
1625+
fullyCachedFiles.putAll(BucketProtoUtils.fromPB(cacheEntry.getCachedFilesMap()));
1626+
1627+
backingMap.clear();
1628+
blocksByHFile.clear();
1629+
1630+
// Read the backing map entries in batches.
1631+
int numChunks = 0;
1632+
while (in.available() > 0) {
1633+
updateCacheIndex(BucketCacheProtos.BackingMap.parseDelimitedFrom(in),
1634+
cacheEntry.getDeserializersMap());
1635+
numChunks++;
1636+
}
1637+
1638+
LOG.info("Retrieved {} of chunks with blockCount = {}.", numChunks, backingMap.size());
1639+
verifyFileIntegrity(cacheEntry);
1640+
verifyCapacityAndClasses(cacheEntry.getCacheCapacity(), cacheEntry.getIoClass(),
1641+
cacheEntry.getMapClass());
16631642
updateRegionSizeMapWhileRetrievingFromFile();
16641643
}
16651644

hbase-server/src/main/java/org/apache/hadoop/hbase/io/hfile/bucket/BucketProtoUtils.java

Lines changed: 30 additions & 29 deletions
Original file line numberDiff line numberDiff line change
@@ -33,7 +33,6 @@
3333
import org.apache.hadoop.hbase.io.hfile.BlockType;
3434
import org.apache.hadoop.hbase.io.hfile.CacheableDeserializerIdManager;
3535
import org.apache.hadoop.hbase.io.hfile.HFileBlock;
36-
import org.apache.hadoop.hbase.util.Bytes;
3736
import org.apache.hadoop.hbase.util.Pair;
3837
import org.apache.yetus.audience.InterfaceAudience;
3938

@@ -51,51 +50,53 @@ private BucketProtoUtils() {
5150
}
5251

5352
static BucketCacheProtos.BucketCacheEntry toPB(BucketCache cache,
54-
BucketCacheProtos.BackingMap backingMap) {
53+
BucketCacheProtos.BackingMap.Builder backingMapBuilder) {
5554
return BucketCacheProtos.BucketCacheEntry.newBuilder().setCacheCapacity(cache.getMaxSize())
5655
.setIoClass(cache.ioEngine.getClass().getName())
5756
.setMapClass(cache.backingMap.getClass().getName())
5857
.putAllDeserializers(CacheableDeserializerIdManager.save())
59-
.putAllCachedFiles(toCachedPB(cache.fullyCachedFiles)).setBackingMap(backingMap)
58+
.putAllCachedFiles(toCachedPB(cache.fullyCachedFiles))
59+
.setBackingMap(backingMapBuilder.build())
6060
.setChecksum(ByteString
6161
.copyFrom(((PersistentIOEngine) cache.ioEngine).calculateChecksum(cache.getAlgorithm())))
6262
.build();
6363
}
6464

65-
public static void serializeAsPB(BucketCache cache, FileOutputStream fos, long chunkSize,
66-
long numChunks) throws IOException {
67-
int blockCount = 0;
68-
int chunkCount = 0;
69-
int backingMapSize = cache.backingMap.size();
70-
BucketCacheProtos.BackingMap.Builder builder = BucketCacheProtos.BackingMap.newBuilder();
71-
65+
public static void serializeAsPB(BucketCache cache, FileOutputStream fos, long chunkSize)
66+
throws IOException {
67+
// Write the new version of magic number.
7268
fos.write(PB_MAGIC_V2);
73-
fos.write(Bytes.toBytes(chunkSize));
74-
fos.write(Bytes.toBytes(numChunks));
7569

70+
BucketCacheProtos.BackingMap.Builder builder = BucketCacheProtos.BackingMap.newBuilder();
7671
BucketCacheProtos.BackingMapEntry.Builder entryBuilder =
7772
BucketCacheProtos.BackingMapEntry.newBuilder();
73+
74+
// Persist the metadata first.
75+
toPB(cache, builder).writeDelimitedTo(fos);
76+
77+
int blockCount = 0;
78+
// Persist backing map entries in chunks of size 'chunkSize'.
7879
for (Map.Entry<BlockCacheKey, BucketEntry> entry : cache.backingMap.entrySet()) {
7980
blockCount++;
80-
entryBuilder.clear();
81-
entryBuilder.setKey(BucketProtoUtils.toPB(entry.getKey()));
82-
entryBuilder.setValue(BucketProtoUtils.toPB(entry.getValue()));
83-
builder.addEntry(entryBuilder.build());
84-
85-
if (blockCount % chunkSize == 0 || (blockCount == backingMapSize)) {
86-
chunkCount++;
87-
if (chunkCount == 1) {
88-
// Persist all details along with the first chunk into BucketCacheEntry
89-
BucketProtoUtils.toPB(cache, builder.build()).writeDelimitedTo(fos);
90-
} else {
91-
// Directly persist subsequent backing-map chunks.
92-
builder.build().writeDelimitedTo(fos);
93-
}
94-
if (blockCount < backingMapSize) {
95-
builder.clear();
96-
}
81+
addEntryToBuilder(entry, entryBuilder, builder);
82+
if (blockCount % chunkSize == 0) {
83+
builder.build().writeDelimitedTo(fos);
84+
builder.clear();
9785
}
9886
}
87+
// Persist the last chunk.
88+
if (builder.getEntryList().size() > 0) {
89+
builder.build().writeDelimitedTo(fos);
90+
}
91+
}
92+
93+
private static void addEntryToBuilder(Map.Entry<BlockCacheKey, BucketEntry> entry,
94+
BucketCacheProtos.BackingMapEntry.Builder entryBuilder,
95+
BucketCacheProtos.BackingMap.Builder builder) {
96+
entryBuilder.clear();
97+
entryBuilder.setKey(BucketProtoUtils.toPB(entry.getKey()));
98+
entryBuilder.setValue(BucketProtoUtils.toPB(entry.getValue()));
99+
builder.addEntry(entryBuilder.build());
99100
}
100101

101102
private static BucketCacheProtos.BlockCacheKey toPB(BlockCacheKey key) {

hbase-server/src/test/java/org/apache/hadoop/hbase/io/hfile/bucket/TestVerifyBucketCacheFile.java

Lines changed: 8 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -367,10 +367,17 @@ public void testSingleChunk() throws Exception {
367367
}
368368

369369
@Test
370-
public void testMultipleChunks() throws Exception {
370+
public void testCompletelyFilledChunks() throws Exception {
371+
// Test where the all the chunks are complete with chunkSize entries
371372
testChunkedBackingMapRecovery(5, 10);
372373
}
373374

375+
@Test
376+
public void testPartiallyFilledChunks() throws Exception {
377+
// Test where the last chunk is not completely filled.
378+
testChunkedBackingMapRecovery(5, 13);
379+
}
380+
374381
private void testChunkedBackingMapRecovery(int chunkSize, int numBlocks) throws Exception {
375382
HBaseTestingUtility TEST_UTIL = new HBaseTestingUtility();
376383
Path testDir = TEST_UTIL.getDataTestDir();

0 commit comments

Comments
 (0)