Skip to content

Commit 42f2bf9

Browse files
ndimidukcharlesconnell
authored andcommitted
HBASE-29218: Reduce calls to Configuration#get() in decompression path (apache#6857)
Signed-off-by: Nick Dimiduk <[email protected]> Co-authored-by: Charles Connell <[email protected]>
1 parent b9e421c commit 42f2bf9

File tree

13 files changed

+269
-79
lines changed

13 files changed

+269
-79
lines changed

hbase-common/pom.xml

Lines changed: 0 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -109,10 +109,6 @@
109109
<groupId>org.apache.commons</groupId>
110110
<artifactId>commons-crypto</artifactId>
111111
</dependency>
112-
<dependency>
113-
<groupId>com.github.ben-manes.caffeine</groupId>
114-
<artifactId>caffeine</artifactId>
115-
</dependency>
116112
<dependency>
117113
<groupId>junit</groupId>
118114
<artifactId>junit</artifactId>

hbase-common/src/main/java/org/apache/hadoop/hbase/io/compress/ByteBuffDecompressionCodec.java

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -17,6 +17,7 @@
1717
*/
1818
package org.apache.hadoop.hbase.io.compress;
1919

20+
import org.apache.hadoop.conf.Configuration;
2021
import org.apache.yetus.audience.InterfaceAudience;
2122

2223
@InterfaceAudience.Private
@@ -26,4 +27,7 @@ public interface ByteBuffDecompressionCodec {
2627

2728
ByteBuffDecompressor createByteBuffDecompressor();
2829

30+
Compression.HFileDecompressionContext
31+
getDecompressionContextFromConfiguration(Configuration conf);
32+
2933
}

hbase-common/src/main/java/org/apache/hadoop/hbase/io/compress/ByteBuffDecompressor.java

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -17,6 +17,7 @@
1717
*/
1818
package org.apache.hadoop.hbase.io.compress;
1919

20+
import edu.umd.cs.findbugs.annotations.Nullable;
2021
import java.io.Closeable;
2122
import java.io.IOException;
2223
import org.apache.hadoop.hbase.nio.ByteBuff;
@@ -45,4 +46,11 @@ public interface ByteBuffDecompressor extends Closeable {
4546
*/
4647
boolean canDecompress(ByteBuff output, ByteBuff input);
4748

49+
/**
50+
* Call before every use of {@link #canDecompress(ByteBuff, ByteBuff)} and
51+
* {@link #decompress(ByteBuff, ByteBuff, int)} to reinitialize the decompressor with settings
52+
* from the HFileInfo. This can matter because ByteBuffDecompressors are reused many times.
53+
*/
54+
void reinit(@Nullable Compression.HFileDecompressionContext newHFileDecompressionContext);
55+
4856
}

hbase-common/src/main/java/org/apache/hadoop/hbase/io/compress/CodecPool.java

Lines changed: 12 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -17,8 +17,6 @@
1717
*/
1818
package org.apache.hadoop.hbase.io.compress;
1919

20-
import com.github.benmanes.caffeine.cache.Caffeine;
21-
import com.github.benmanes.caffeine.cache.LoadingCache;
2220
import edu.umd.cs.findbugs.annotations.Nullable;
2321
import java.util.Comparator;
2422
import java.util.NavigableSet;
@@ -37,6 +35,10 @@
3735
import org.slf4j.Logger;
3836
import org.slf4j.LoggerFactory;
3937

38+
import org.apache.hbase.thirdparty.com.google.common.cache.CacheBuilder;
39+
import org.apache.hbase.thirdparty.com.google.common.cache.CacheLoader;
40+
import org.apache.hbase.thirdparty.com.google.common.cache.LoadingCache;
41+
4042
/**
4143
* A global compressor/decompressor pool used to save and reuse (possibly native)
4244
* compression/decompression codecs. Copied from the class of the same name in hadoop-common and
@@ -56,7 +58,12 @@ public class CodecPool {
5658
NavigableSet<ByteBuffDecompressor>> BYTE_BUFF_DECOMPRESSOR_POOL = new ConcurrentHashMap<>();
5759

5860
private static <T> LoadingCache<Class<T>, AtomicInteger> createCache() {
59-
return Caffeine.newBuilder().build(key -> new AtomicInteger());
61+
return CacheBuilder.newBuilder().build(new CacheLoader<Class<T>, AtomicInteger>() {
62+
@Override
63+
public AtomicInteger load(Class<T> key) throws Exception {
64+
return new AtomicInteger();
65+
}
66+
});
6067
}
6168

6269
/**
@@ -108,26 +115,19 @@ private static <T> boolean payback(ConcurrentMap<Class<T>, NavigableSet<T>> pool
108115
/**
109116
* Copied from hadoop-common without significant modification.
110117
*/
111-
@SuppressWarnings("unchecked")
112-
@edu.umd.cs.findbugs.annotations.SuppressWarnings(
113-
value = "NP_NULL_ON_SOME_PATH_FROM_RETURN_VALUE",
114-
justification = "LoadingCache will compute value if absent")
115118
private static <T> int getLeaseCount(LoadingCache<Class<T>, AtomicInteger> usageCounts,
116119
Class<? extends T> codecClass) {
117-
return usageCounts.get((Class<T>) codecClass).get();
120+
return usageCounts.getUnchecked((Class<T>) codecClass).get();
118121
}
119122

120123
/**
121124
* Copied from hadoop-common without significant modification.
122125
*/
123-
@edu.umd.cs.findbugs.annotations.SuppressWarnings(
124-
value = "NP_NULL_ON_SOME_PATH_FROM_RETURN_VALUE",
125-
justification = "LoadingCache will compute value if absent")
126126
private static <T> void updateLeaseCount(LoadingCache<Class<T>, AtomicInteger> usageCounts,
127127
T codec, int delta) {
128128
if (codec != null && usageCounts != null) {
129129
Class<T> codecClass = ReflectionUtils.getClass(codec);
130-
usageCounts.get(codecClass).addAndGet(delta);
130+
usageCounts.getUnchecked(codecClass).addAndGet(delta);
131131
}
132132
}
133133

hbase-common/src/main/java/org/apache/hadoop/hbase/io/compress/Compression.java

Lines changed: 28 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -17,15 +17,18 @@
1717
*/
1818
package org.apache.hadoop.hbase.io.compress;
1919

20+
import edu.umd.cs.findbugs.annotations.Nullable;
2021
import java.io.BufferedInputStream;
2122
import java.io.BufferedOutputStream;
23+
import java.io.Closeable;
2224
import java.io.FilterOutputStream;
2325
import java.io.IOException;
2426
import java.io.InputStream;
2527
import java.io.OutputStream;
2628
import org.apache.hadoop.conf.Configurable;
2729
import org.apache.hadoop.conf.Configuration;
2830
import org.apache.hadoop.hbase.HBaseConfiguration;
31+
import org.apache.hadoop.hbase.io.HeapSize;
2932
import org.apache.hadoop.hbase.nio.ByteBuff;
3033
import org.apache.hadoop.io.compress.CompressionCodec;
3134
import org.apache.hadoop.io.compress.CompressionInputStream;
@@ -548,11 +551,36 @@ public void returnByteBuffDecompressor(ByteBuffDecompressor decompressor) {
548551
}
549552
}
550553

554+
/**
555+
* Get an object that holds settings used by ByteBuffDecompressor. It's expensive to pull these
556+
* from a Configuration object every time we decompress a block, so pull them here when, for
557+
* example, opening an HFile, and reuse the returned HFileDecompressionContext as much as
558+
* possible. The concrete class of this object will be one that is specific to the codec
559+
* implementation in use. You don't need to inspect it yourself, just pass it along to
560+
* {@link ByteBuffDecompressor#reinit(HFileDecompressionContext)}.
561+
*/
562+
@Nullable
563+
public HFileDecompressionContext
564+
getHFileDecompressionContextForConfiguration(Configuration conf) {
565+
if (supportsByteBuffDecompression()) {
566+
return ((ByteBuffDecompressionCodec) getCodec(conf))
567+
.getDecompressionContextFromConfiguration(conf);
568+
} else {
569+
return null;
570+
}
571+
}
572+
551573
public String getName() {
552574
return compressName;
553575
}
554576
}
555577

578+
/**
579+
* See {@link Algorithm#getHFileDecompressionContextForConfiguration(Configuration)}.
580+
*/
581+
public static abstract class HFileDecompressionContext implements Closeable, HeapSize {
582+
}
583+
556584
public static Algorithm getCompressionAlgorithmByName(String compressName) {
557585
Algorithm[] algos = Algorithm.class.getEnumConstants();
558586

hbase-common/src/main/java/org/apache/hadoop/hbase/io/compress/DictionaryCache.java

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -59,11 +59,11 @@ private DictionaryCache() {
5959
* @param path the hadoop Path where the dictionary is located, as a String
6060
* @return the dictionary bytes if successful, null otherwise
6161
*/
62-
public static byte[] getDictionary(final Configuration conf, final String path)
63-
throws IOException {
62+
public static byte[] getDictionary(final Configuration conf, final String path) {
6463
if (path == null || path.isEmpty()) {
6564
return null;
6665
}
66+
6767
// Create the dictionary loading cache if we haven't already
6868
if (CACHE == null) {
6969
synchronized (DictionaryCache.class) {
@@ -91,7 +91,7 @@ public byte[] load(String s) throws Exception {
9191
try {
9292
return CACHE.get(path);
9393
} catch (ExecutionException e) {
94-
throw new IOException(e);
94+
throw new RuntimeException("Unable to load dictionary at " + path, e);
9595
}
9696
}
9797

hbase-common/src/main/java/org/apache/hadoop/hbase/io/encoding/HFileBlockDefaultDecodingContext.java

Lines changed: 2 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -139,9 +139,7 @@ private void decompressViaByteBuff(ByteBuff blockBufferWithoutHeader, ByteBuff o
139139
Compression.Algorithm compression = fileContext.getCompression();
140140
ByteBuffDecompressor decompressor = compression.getByteBuffDecompressor();
141141
try {
142-
if (decompressor instanceof CanReinit) {
143-
((CanReinit) decompressor).reinit(conf);
144-
}
142+
decompressor.reinit(fileContext.getDecompressionContext());
145143
decompressor.decompress(blockBufferWithoutHeader, onDiskBlock, onDiskSizeWithoutHeader);
146144
} finally {
147145
compression.returnByteBuffDecompressor(decompressor);
@@ -160,9 +158,7 @@ private boolean canDecompressViaByteBuff(ByteBuff blockBufferWithoutHeader,
160158
} else {
161159
ByteBuffDecompressor decompressor = fileContext.getCompression().getByteBuffDecompressor();
162160
try {
163-
if (decompressor instanceof CanReinit) {
164-
((CanReinit) decompressor).reinit(conf);
165-
}
161+
decompressor.reinit(fileContext.getDecompressionContext());
166162
// Even if we have a ByteBuffDecompressor, we still need to check if it can decompress
167163
// our particular ByteBuffs
168164
return decompressor.canDecompress(blockBufferWithoutHeader, onDiskBlock);

hbase-common/src/main/java/org/apache/hadoop/hbase/io/hfile/HFileContext.java

Lines changed: 33 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -17,6 +17,8 @@
1717
*/
1818
package org.apache.hadoop.hbase.io.hfile;
1919

20+
import edu.umd.cs.findbugs.annotations.Nullable;
21+
import org.apache.hadoop.conf.Configuration;
2022
import org.apache.hadoop.hbase.CellComparator;
2123
import org.apache.hadoop.hbase.HConstants;
2224
import org.apache.hadoop.hbase.InnerStoreCellComparator;
@@ -50,6 +52,11 @@ public class HFileContext implements HeapSize, Cloneable {
5052
private boolean includesTags;
5153
/** Compression algorithm used **/
5254
private Compression.Algorithm compressAlgo = Compression.Algorithm.NONE;
55+
/**
56+
* Details used by compression algorithm that are more efficiently loaded once and then reused
57+
**/
58+
@Nullable
59+
private Compression.HFileDecompressionContext decompressionContext = null;
5360
/** Whether tags to be compressed or not **/
5461
private boolean compressTags;
5562
/** the checksum type **/
@@ -80,6 +87,7 @@ public HFileContext(HFileContext context) {
8087
this.includesMvcc = context.includesMvcc;
8188
this.includesTags = context.includesTags;
8289
this.compressAlgo = context.compressAlgo;
90+
this.decompressionContext = context.decompressionContext;
8391
this.compressTags = context.compressTags;
8492
this.checksumType = context.checksumType;
8593
this.bytesPerChecksum = context.bytesPerChecksum;
@@ -95,14 +103,16 @@ public HFileContext(HFileContext context) {
95103
}
96104

97105
HFileContext(boolean useHBaseChecksum, boolean includesMvcc, boolean includesTags,
98-
Compression.Algorithm compressAlgo, boolean compressTags, ChecksumType checksumType,
99-
int bytesPerChecksum, int blockSize, DataBlockEncoding encoding,
100-
Encryption.Context cryptoContext, long fileCreateTime, String hfileName, byte[] columnFamily,
101-
byte[] tableName, CellComparator cellComparator, IndexBlockEncoding indexBlockEncoding) {
106+
Compression.Algorithm compressAlgo, Compression.HFileDecompressionContext decompressionContext,
107+
boolean compressTags, ChecksumType checksumType, int bytesPerChecksum, int blockSize,
108+
DataBlockEncoding encoding, Encryption.Context cryptoContext, long fileCreateTime,
109+
String hfileName, byte[] columnFamily, byte[] tableName, CellComparator cellComparator,
110+
IndexBlockEncoding indexBlockEncoding) {
102111
this.usesHBaseChecksum = useHBaseChecksum;
103112
this.includesMvcc = includesMvcc;
104113
this.includesTags = includesTags;
105114
this.compressAlgo = compressAlgo;
115+
this.decompressionContext = decompressionContext;
106116
this.compressTags = compressTags;
107117
this.checksumType = checksumType;
108118
this.bytesPerChecksum = bytesPerChecksum;
@@ -141,6 +151,20 @@ public Compression.Algorithm getCompression() {
141151
return compressAlgo;
142152
}
143153

154+
/**
155+
* Get an object that, if non-null, may be cast into a codec-specific type that exposes some
156+
* information from the store-file-specific Configuration that is relevant to decompression. For
157+
* example, ZSTD tables can have "hbase.io.compress.zstd.dictionary" on their table descriptor,
158+
* and decompressions of blocks in that table must use that dictionary. It's cheaper for HBase to
159+
* load these settings into an object of their own once and check this upon each block
160+
* decompression, than it is to call into {@link Configuration#get(String)} on each block
161+
* decompression.
162+
*/
163+
@Nullable
164+
public Compression.HFileDecompressionContext getDecompressionContext() {
165+
return decompressionContext;
166+
}
167+
144168
public boolean isUseHBaseChecksum() {
145169
return usesHBaseChecksum;
146170
}
@@ -238,6 +262,9 @@ public long heapSize() {
238262
if (this.tableName != null) {
239263
size += ClassSize.sizeOfByteArray(this.tableName.length);
240264
}
265+
if (this.decompressionContext != null) {
266+
size += this.decompressionContext.heapSize();
267+
}
241268
return size;
242269
}
243270

@@ -274,6 +301,8 @@ public String toString() {
274301
sb.append(compressAlgo);
275302
sb.append(", compressTags=");
276303
sb.append(compressTags);
304+
sb.append(", decompressionContext=");
305+
sb.append(decompressionContext);
277306
sb.append(", cryptoContext=[");
278307
sb.append(cryptoContext);
279308
sb.append("]");

hbase-common/src/main/java/org/apache/hadoop/hbase/io/hfile/HFileContextBuilder.java

Lines changed: 14 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -17,8 +17,10 @@
1717
*/
1818
package org.apache.hadoop.hbase.io.hfile;
1919

20+
import edu.umd.cs.findbugs.annotations.Nullable;
2021
import org.apache.hadoop.hbase.CellComparator;
2122
import org.apache.hadoop.hbase.HConstants;
23+
import org.apache.hadoop.hbase.io.compress.Compression;
2224
import org.apache.hadoop.hbase.io.compress.Compression.Algorithm;
2325
import org.apache.hadoop.hbase.io.crypto.Encryption;
2426
import org.apache.hadoop.hbase.io.encoding.DataBlockEncoding;
@@ -42,6 +44,8 @@ public class HFileContextBuilder {
4244
private boolean includesTags = false;
4345
/** Compression algorithm used **/
4446
private Algorithm compression = Algorithm.NONE;
47+
@Nullable
48+
private Compression.HFileDecompressionContext decompressionContext = null;
4549
/** Whether tags to be compressed or not **/
4650
private boolean compressTags = false;
4751
/** the checksum type **/
@@ -73,6 +77,7 @@ public HFileContextBuilder(final HFileContext hfc) {
7377
this.includesMvcc = hfc.isIncludesMvcc();
7478
this.includesTags = hfc.isIncludesTags();
7579
this.compression = hfc.getCompression();
80+
this.decompressionContext = hfc.getDecompressionContext();
7681
this.compressTags = hfc.isCompressTags();
7782
this.checkSumType = hfc.getChecksumType();
7883
this.bytesPerChecksum = hfc.getBytesPerChecksum();
@@ -107,6 +112,12 @@ public HFileContextBuilder withCompression(Algorithm compression) {
107112
return this;
108113
}
109114

115+
public HFileContextBuilder
116+
withDecompressionContext(@Nullable Compression.HFileDecompressionContext decompressionContext) {
117+
this.decompressionContext = decompressionContext;
118+
return this;
119+
}
120+
110121
public HFileContextBuilder withCompressTags(boolean compressTags) {
111122
this.compressTags = compressTags;
112123
return this;
@@ -169,7 +180,8 @@ public HFileContextBuilder withCellComparator(CellComparator cellComparator) {
169180

170181
public HFileContext build() {
171182
return new HFileContext(usesHBaseChecksum, includesMvcc, includesTags, compression,
172-
compressTags, checkSumType, bytesPerChecksum, blockSize, encoding, cryptoContext,
173-
fileCreateTime, hfileName, columnFamily, tableName, cellComparator, indexBlockEncoding);
183+
decompressionContext, compressTags, checkSumType, bytesPerChecksum, blockSize, encoding,
184+
cryptoContext, fileCreateTime, hfileName, columnFamily, tableName, cellComparator,
185+
indexBlockEncoding);
174186
}
175187
}

0 commit comments

Comments
 (0)