Skip to content

Commit bfd8501

Browse files
authored
HBASE-27314 Make index block be customized and configured (#4763)
Signed-off-by: Duo Zhang <zhangduo@apache.org>
1 parent 175f5af commit bfd8501

18 files changed

Lines changed: 1033 additions & 81 deletions

File tree

hbase-client/src/main/java/org/apache/hadoop/hbase/HColumnDescriptor.java

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -26,6 +26,7 @@
2626
import org.apache.hadoop.hbase.exceptions.HBaseException;
2727
import org.apache.hadoop.hbase.io.compress.Compression;
2828
import org.apache.hadoop.hbase.io.encoding.DataBlockEncoding;
29+
import org.apache.hadoop.hbase.io.encoding.IndexBlockEncoding;
2930
import org.apache.hadoop.hbase.regionserver.BloomType;
3031
import org.apache.hadoop.hbase.util.Bytes;
3132
import org.apache.hadoop.hbase.util.PrettyPrinter.Unit;
@@ -338,6 +339,11 @@ public DataBlockEncoding getDataBlockEncoding() {
338339
return delegatee.getDataBlockEncoding();
339340
}
340341

342+
@Override
343+
public IndexBlockEncoding getIndexBlockEncoding() {
344+
return delegatee.getIndexBlockEncoding();
345+
}
346+
341347
/**
342348
* Set data block encoding algorithm used in block cache.
343349
* @param value What kind of data block encoding will be used.

hbase-client/src/main/java/org/apache/hadoop/hbase/client/ColumnFamilyDescriptor.java

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -24,6 +24,7 @@
2424
import org.apache.hadoop.hbase.MemoryCompactionPolicy;
2525
import org.apache.hadoop.hbase.io.compress.Compression;
2626
import org.apache.hadoop.hbase.io.encoding.DataBlockEncoding;
27+
import org.apache.hadoop.hbase.io.encoding.IndexBlockEncoding;
2728
import org.apache.hadoop.hbase.regionserver.BloomType;
2829
import org.apache.hadoop.hbase.util.Bytes;
2930
import org.apache.yetus.audience.InterfaceAudience;
@@ -107,6 +108,9 @@ public interface ColumnFamilyDescriptor {
107108
/** Returns the data block encoding algorithm used in block cache and optionally on disk */
108109
DataBlockEncoding getDataBlockEncoding();
109110

111+
/** Return the index block encoding algorithm used in block cache and optionally on disk */
112+
IndexBlockEncoding getIndexBlockEncoding();
113+
110114
/** Returns Return the raw crypto key attribute for the family, or null if not set */
111115
byte[] getEncryptionKey();
112116

hbase-client/src/main/java/org/apache/hadoop/hbase/client/ColumnFamilyDescriptorBuilder.java

Lines changed: 32 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -31,6 +31,7 @@
3131
import org.apache.hadoop.hbase.exceptions.HBaseException;
3232
import org.apache.hadoop.hbase.io.compress.Compression;
3333
import org.apache.hadoop.hbase.io.encoding.DataBlockEncoding;
34+
import org.apache.hadoop.hbase.io.encoding.IndexBlockEncoding;
3435
import org.apache.hadoop.hbase.regionserver.BloomType;
3536
import org.apache.hadoop.hbase.util.Bytes;
3637
import org.apache.hadoop.hbase.util.PrettyPrinter;
@@ -84,6 +85,10 @@ public class ColumnFamilyDescriptorBuilder {
8485
public static final String DATA_BLOCK_ENCODING = "DATA_BLOCK_ENCODING";
8586
private static final Bytes DATA_BLOCK_ENCODING_BYTES =
8687
new Bytes(Bytes.toBytes(DATA_BLOCK_ENCODING));
88+
@InterfaceAudience.Private
89+
public static final String INDEX_BLOCK_ENCODING = "INDEX_BLOCK_ENCODING";
90+
private static final Bytes INDEX_BLOCK_ENCODING_BYTES =
91+
new Bytes(Bytes.toBytes(INDEX_BLOCK_ENCODING));
8792
/**
8893
* Key for the BLOCKCACHE attribute. A more exact name would be CACHE_DATA_ON_READ because this
8994
* flag sets whether or not we cache DATA blocks. We always cache INDEX and BLOOM blocks; caching
@@ -199,6 +204,11 @@ public class ColumnFamilyDescriptorBuilder {
199204
*/
200205
public static final DataBlockEncoding DEFAULT_DATA_BLOCK_ENCODING = DataBlockEncoding.NONE;
201206

207+
/**
208+
* Default index block encoding algorithm.
209+
*/
210+
public static final IndexBlockEncoding DEFAULT_INDEX_BLOCK_ENCODING = IndexBlockEncoding.NONE;
211+
202212
/**
203213
* Default number of versions of a record to keep.
204214
*/
@@ -301,6 +311,7 @@ public static Map<String, String> getDefaultValues() {
301311
DEFAULT_VALUES.put(BLOCKCACHE, String.valueOf(DEFAULT_BLOCKCACHE));
302312
DEFAULT_VALUES.put(KEEP_DELETED_CELLS, String.valueOf(DEFAULT_KEEP_DELETED));
303313
DEFAULT_VALUES.put(DATA_BLOCK_ENCODING, String.valueOf(DEFAULT_DATA_BLOCK_ENCODING));
314+
DEFAULT_VALUES.put(INDEX_BLOCK_ENCODING, String.valueOf(DEFAULT_INDEX_BLOCK_ENCODING));
304315
// Do NOT add this key/value by default. NEW_VERSION_BEHAVIOR is NOT defined in hbase1 so
305316
// it is not possible to make an hbase1 HCD the same as an hbase2 HCD and so the replication
306317
// compare of schemas will fail. It is OK not adding the below to the initial map because of
@@ -501,6 +512,11 @@ public ColumnFamilyDescriptorBuilder setDataBlockEncoding(DataBlockEncoding valu
501512
return this;
502513
}
503514

515+
public ColumnFamilyDescriptorBuilder setIndexBlockEncoding(IndexBlockEncoding value) {
516+
desc.setIndexBlockEncoding(value);
517+
return this;
518+
}
519+
504520
public ColumnFamilyDescriptorBuilder setEncryptionKey(final byte[] value) {
505521
desc.setEncryptionKey(value);
506522
return this;
@@ -832,6 +848,22 @@ public ModifyableColumnFamilyDescriptor setDataBlockEncoding(DataBlockEncoding t
832848
type == null ? DataBlockEncoding.NONE.name() : type.name());
833849
}
834850

851+
@Override
852+
public IndexBlockEncoding getIndexBlockEncoding() {
853+
return getStringOrDefault(INDEX_BLOCK_ENCODING_BYTES,
854+
n -> IndexBlockEncoding.valueOf(n.toUpperCase()), IndexBlockEncoding.NONE);
855+
}
856+
857+
/**
858+
* Set index block encoding algorithm used in block cache.
859+
* @param type What kind of index block encoding will be used.
860+
* @return this (for chained invocation)
861+
*/
862+
public ModifyableColumnFamilyDescriptor setIndexBlockEncoding(IndexBlockEncoding type) {
863+
return setValue(INDEX_BLOCK_ENCODING_BYTES,
864+
type == null ? IndexBlockEncoding.NONE.name() : type.name());
865+
}
866+
835867
/**
836868
* Set whether the tags should be compressed along with DataBlockEncoding. When no
837869
* DataBlockEncoding is been used, this is having no effect. n * @return this (for chained

hbase-client/src/test/java/org/apache/hadoop/hbase/client/TestColumnFamilyDescriptorBuilder.java

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -31,6 +31,7 @@
3131
import org.apache.hadoop.hbase.io.compress.Compression;
3232
import org.apache.hadoop.hbase.io.compress.Compression.Algorithm;
3333
import org.apache.hadoop.hbase.io.encoding.DataBlockEncoding;
34+
import org.apache.hadoop.hbase.io.encoding.IndexBlockEncoding;
3435
import org.apache.hadoop.hbase.regionserver.BloomType;
3536
import org.apache.hadoop.hbase.testclassification.MiscTests;
3637
import org.apache.hadoop.hbase.testclassification.SmallTests;
@@ -224,7 +225,7 @@ public void testSetBlocksize() throws HBaseException {
224225
@Test
225226
public void testDefaultBuilder() {
226227
final Map<String, String> defaultValueMap = ColumnFamilyDescriptorBuilder.getDefaultValues();
227-
assertEquals(defaultValueMap.size(), 11);
228+
assertEquals(defaultValueMap.size(), 12);
228229
assertEquals(defaultValueMap.get(ColumnFamilyDescriptorBuilder.BLOOMFILTER),
229230
BloomType.ROW.toString());
230231
assertEquals(defaultValueMap.get(ColumnFamilyDescriptorBuilder.REPLICATION_SCOPE), "0");
@@ -244,6 +245,8 @@ public void testDefaultBuilder() {
244245
KeepDeletedCells.FALSE.toString());
245246
assertEquals(defaultValueMap.get(ColumnFamilyDescriptorBuilder.DATA_BLOCK_ENCODING),
246247
DataBlockEncoding.NONE.toString());
248+
assertEquals(defaultValueMap.get(ColumnFamilyDescriptorBuilder.INDEX_BLOCK_ENCODING),
249+
IndexBlockEncoding.NONE.toString());
247250
}
248251

249252
@Test
Lines changed: 121 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,121 @@
1+
/*
2+
* Licensed to the Apache Software Foundation (ASF) under one
3+
* or more contributor license agreements. See the NOTICE file
4+
* distributed with this work for additional information
5+
* regarding copyright ownership. The ASF licenses this file
6+
* to you under the Apache License, Version 2.0 (the
7+
* "License"); you may not use this file except in compliance
8+
* with the License. You may obtain a copy of the License at
9+
*
10+
* http://www.apache.org/licenses/LICENSE-2.0
11+
*
12+
* Unless required by applicable law or agreed to in writing, software
13+
* distributed under the License is distributed on an "AS IS" BASIS,
14+
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15+
* See the License for the specific language governing permissions and
16+
* limitations under the License.
17+
*/
18+
package org.apache.hadoop.hbase.io.encoding;
19+
20+
import java.io.IOException;
21+
import java.io.OutputStream;
22+
import org.apache.hadoop.hbase.util.Bytes;
23+
import org.apache.yetus.audience.InterfaceAudience;
24+
25+
/**
26+
* Provide access to all index block encoding algorithms. All of the algorithms are required to have
27+
* unique id which should <b>NEVER</b> be changed. If you want to add a new algorithm/version,
28+
* assign it a new id. Announce the new id in the HBase mailing list to prevent collisions.
29+
*/
30+
@InterfaceAudience.Public
31+
public enum IndexBlockEncoding {
32+
33+
/** Disable index block encoding. */
34+
NONE(0, null),
35+
// id 1 is reserved for the PREFIX_TREE algorithm to be added later
36+
PREFIX_TREE(1, null);
37+
38+
private final short id;
39+
private final byte[] idInBytes;
40+
private final String encoderCls;
41+
42+
public static final int ID_SIZE = Bytes.SIZEOF_SHORT;
43+
44+
/** Maps data block encoding ids to enum instances. */
45+
private static IndexBlockEncoding[] idArray = new IndexBlockEncoding[Byte.MAX_VALUE + 1];
46+
47+
static {
48+
for (IndexBlockEncoding algo : values()) {
49+
if (idArray[algo.id] != null) {
50+
throw new RuntimeException(
51+
String.format("Two data block encoder algorithms '%s' and '%s' have " + "the same id %d",
52+
idArray[algo.id].toString(), algo.toString(), (int) algo.id));
53+
}
54+
idArray[algo.id] = algo;
55+
}
56+
}
57+
58+
private IndexBlockEncoding(int id, String encoderClsName) {
59+
if (id < 0 || id > Byte.MAX_VALUE) {
60+
throw new AssertionError("Data block encoding algorithm id is out of range: " + id);
61+
}
62+
this.id = (short) id;
63+
this.idInBytes = Bytes.toBytes(this.id);
64+
if (idInBytes.length != ID_SIZE) {
65+
// White this may seem redundant, if we accidentally serialize
66+
// the id as e.g. an int instead of a short, all encoders will break.
67+
throw new RuntimeException("Unexpected length of encoder ID byte " + "representation: "
68+
+ Bytes.toStringBinary(idInBytes));
69+
}
70+
this.encoderCls = encoderClsName;
71+
}
72+
73+
/** Returns name converted to bytes. */
74+
public byte[] getNameInBytes() {
75+
return Bytes.toBytes(toString());
76+
}
77+
78+
/** Returns The id of a data block encoder. */
79+
public short getId() {
80+
return id;
81+
}
82+
83+
/**
84+
* Writes id in bytes.
85+
* @param stream where the id should be written.
86+
*/
87+
public void writeIdInBytes(OutputStream stream) throws IOException {
88+
stream.write(idInBytes);
89+
}
90+
91+
/**
92+
* Writes id bytes to the given array starting from offset.
93+
* @param dest output array
94+
* @param offset starting offset of the output array n
95+
*/
96+
public void writeIdInBytes(byte[] dest, int offset) throws IOException {
97+
System.arraycopy(idInBytes, 0, dest, offset, ID_SIZE);
98+
}
99+
100+
/**
101+
* Find and return the name of data block encoder for the given id.
102+
* @param encoderId id of data block encoder
103+
* @return name, same as used in options in column family
104+
*/
105+
public static String getNameFromId(short encoderId) {
106+
return getEncodingById(encoderId).toString();
107+
}
108+
109+
public static IndexBlockEncoding getEncodingById(short indexBlockEncodingId) {
110+
IndexBlockEncoding algorithm = null;
111+
if (indexBlockEncodingId >= 0 && indexBlockEncodingId <= Byte.MAX_VALUE) {
112+
algorithm = idArray[indexBlockEncodingId];
113+
}
114+
if (algorithm == null) {
115+
throw new IllegalArgumentException(String
116+
.format("There is no index block encoder for given id '%d'", (int) indexBlockEncodingId));
117+
}
118+
return algorithm;
119+
}
120+
121+
}

hbase-common/src/main/java/org/apache/hadoop/hbase/io/hfile/HFileContext.java

Lines changed: 13 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -24,6 +24,7 @@
2424
import org.apache.hadoop.hbase.io.compress.Compression;
2525
import org.apache.hadoop.hbase.io.crypto.Encryption;
2626
import org.apache.hadoop.hbase.io.encoding.DataBlockEncoding;
27+
import org.apache.hadoop.hbase.io.encoding.IndexBlockEncoding;
2728
import org.apache.hadoop.hbase.util.Bytes;
2829
import org.apache.hadoop.hbase.util.ChecksumType;
2930
import org.apache.hadoop.hbase.util.ClassSize;
@@ -58,6 +59,7 @@ public class HFileContext implements HeapSize, Cloneable {
5859
/** Number of uncompressed bytes we allow per block. */
5960
private int blockSize = HConstants.DEFAULT_BLOCKSIZE;
6061
private DataBlockEncoding encoding = DataBlockEncoding.NONE;
62+
private IndexBlockEncoding indexBlockEncoding = IndexBlockEncoding.NONE;
6163
/** Encryption algorithm and key used */
6264
private Encryption.Context cryptoContext = Encryption.Context.NONE;
6365
private long fileCreateTime;
@@ -89,13 +91,14 @@ public HFileContext(HFileContext context) {
8991
this.columnFamily = context.columnFamily;
9092
this.tableName = context.tableName;
9193
this.cellComparator = context.cellComparator;
94+
this.indexBlockEncoding = context.indexBlockEncoding;
9295
}
9396

9497
HFileContext(boolean useHBaseChecksum, boolean includesMvcc, boolean includesTags,
9598
Compression.Algorithm compressAlgo, boolean compressTags, ChecksumType checksumType,
9699
int bytesPerChecksum, int blockSize, DataBlockEncoding encoding,
97100
Encryption.Context cryptoContext, long fileCreateTime, String hfileName, byte[] columnFamily,
98-
byte[] tableName, CellComparator cellComparator) {
101+
byte[] tableName, CellComparator cellComparator, IndexBlockEncoding indexBlockEncoding) {
99102
this.usesHBaseChecksum = useHBaseChecksum;
100103
this.includesMvcc = includesMvcc;
101104
this.includesTags = includesTags;
@@ -107,6 +110,9 @@ public HFileContext(HFileContext context) {
107110
if (encoding != null) {
108111
this.encoding = encoding;
109112
}
113+
if (indexBlockEncoding != null) {
114+
this.indexBlockEncoding = indexBlockEncoding;
115+
}
110116
this.cryptoContext = cryptoContext;
111117
this.fileCreateTime = fileCreateTime;
112118
this.hfileName = hfileName;
@@ -186,6 +192,10 @@ public DataBlockEncoding getDataBlockEncoding() {
186192
return encoding;
187193
}
188194

195+
public IndexBlockEncoding getIndexBlockEncoding() {
196+
return indexBlockEncoding;
197+
}
198+
189199
public Encryption.Context getEncryptionContext() {
190200
return cryptoContext;
191201
}
@@ -253,6 +263,8 @@ public String toString() {
253263
sb.append(blockSize);
254264
sb.append(", encoding=");
255265
sb.append(encoding);
266+
sb.append(", indexBlockEncoding=");
267+
sb.append(indexBlockEncoding);
256268
sb.append(", includesMvcc=");
257269
sb.append(includesMvcc);
258270
sb.append(", includesTags=");

hbase-common/src/main/java/org/apache/hadoop/hbase/io/hfile/HFileContextBuilder.java

Lines changed: 9 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -22,6 +22,7 @@
2222
import org.apache.hadoop.hbase.io.compress.Compression.Algorithm;
2323
import org.apache.hadoop.hbase.io.crypto.Encryption;
2424
import org.apache.hadoop.hbase.io.encoding.DataBlockEncoding;
25+
import org.apache.hadoop.hbase.io.encoding.IndexBlockEncoding;
2526
import org.apache.hadoop.hbase.util.ChecksumType;
2627
import org.apache.yetus.audience.InterfaceAudience;
2728

@@ -50,6 +51,8 @@ public class HFileContextBuilder {
5051
/** Number of uncompressed bytes we allow per block. */
5152
private int blockSize = HConstants.DEFAULT_BLOCKSIZE;
5253
private DataBlockEncoding encoding = DataBlockEncoding.NONE;
54+
/** the index block encoding type **/
55+
private IndexBlockEncoding indexBlockEncoding = IndexBlockEncoding.NONE;
5356
/** Crypto context */
5457
private Encryption.Context cryptoContext = Encryption.Context.NONE;
5558
private long fileCreateTime = 0;
@@ -128,6 +131,11 @@ public HFileContextBuilder withDataBlockEncoding(DataBlockEncoding encoding) {
128131
return this;
129132
}
130133

134+
public HFileContextBuilder withIndexBlockEncoding(IndexBlockEncoding indexBlockEncoding) {
135+
this.indexBlockEncoding = indexBlockEncoding;
136+
return this;
137+
}
138+
131139
public HFileContextBuilder withEncryptionContext(Encryption.Context cryptoContext) {
132140
this.cryptoContext = cryptoContext;
133141
return this;
@@ -161,6 +169,6 @@ public HFileContextBuilder withCellComparator(CellComparator cellComparator) {
161169
public HFileContext build() {
162170
return new HFileContext(usesHBaseChecksum, includesMvcc, includesTags, compression,
163171
compressTags, checkSumType, bytesPerChecksum, blockSize, encoding, cryptoContext,
164-
fileCreateTime, hfileName, columnFamily, tableName, cellComparator);
172+
fileCreateTime, hfileName, columnFamily, tableName, cellComparator, indexBlockEncoding);
165173
}
166174
}

0 commit comments

Comments
 (0)