-
Notifications
You must be signed in to change notification settings - Fork 3.4k
HBASE-28621 PrefixFilter should use SEEK_NEXT_USING_HINT #6361
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from 9 commits
42ca0cf
45d5cbf
3280f9f
75fd8fa
870e9b6
a9db7da
0da2505
2792fb2
31f34b4
5779bd4
3225197
039c896
ce46c31
1aa84f5
ba21710
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -18,8 +18,10 @@ | |
| package org.apache.hadoop.hbase.filter; | ||
|
|
||
| import java.util.ArrayList; | ||
| import java.util.Arrays; | ||
| import org.apache.hadoop.hbase.ByteBufferExtendedCell; | ||
| import org.apache.hadoop.hbase.Cell; | ||
| import org.apache.hadoop.hbase.PrivateCellUtil; | ||
| import org.apache.hadoop.hbase.exceptions.DeserializationException; | ||
| import org.apache.hadoop.hbase.util.ByteBufferUtils; | ||
| import org.apache.hadoop.hbase.util.Bytes; | ||
|
|
@@ -35,13 +37,24 @@ | |
| * Pass results that have same row prefix. | ||
| */ | ||
| @InterfaceAudience.Public | ||
| public class PrefixFilter extends FilterBase { | ||
| public class PrefixFilter extends FilterBase implements HintingFilter { | ||
| protected byte[] prefix = null; | ||
| protected boolean passedPrefix = false; | ||
| protected boolean filterRow = true; | ||
| protected boolean provideHint = false; | ||
| protected Cell reversedNextCellHint; | ||
|
|
||
| public PrefixFilter(final byte[] prefix) { | ||
| this.prefix = prefix; | ||
| // Pre-compute reverse hint at creation to avoid re-computing it several times in the corner | ||
| // case where there are a lot of cells between the hint and the first real match. | ||
| this.reversedNextCellHint = createReversedNextCellHint(); | ||
| } | ||
|
|
||
| private Cell createReversedNextCellHint() { | ||
| // On reversed scan hint should be the prefix with last byte incremented | ||
| byte[] reversedHintBytes = increaseLastNonMaxByte(this.prefix); | ||
| return PrivateCellUtil.createFirstOnRow(reversedHintBytes, 0, (short) reversedHintBytes.length); | ||
| } | ||
|
|
||
| public byte[] getPrefix() { | ||
|
|
@@ -50,10 +63,12 @@ public byte[] getPrefix() { | |
|
|
||
| @Override | ||
| public boolean filterRowKey(Cell firstRowCell) { | ||
| if (firstRowCell == null || this.prefix == null) return true; | ||
| if (filterAllRemaining()) return true; | ||
| int length = firstRowCell.getRowLength(); | ||
| if (length < prefix.length) return true; | ||
| if (firstRowCell == null || this.prefix == null) { | ||
| return true; | ||
| } | ||
| if (filterAllRemaining()) { | ||
| return true; | ||
| } | ||
| // if they are equal, return false => pass row | ||
| // else return true, filter row | ||
| // if we are passed the prefix, set flag | ||
PDavid marked this conversation as resolved.
Show resolved
Hide resolved
|
||
|
|
@@ -70,12 +85,18 @@ public boolean filterRowKey(Cell firstRowCell) { | |
| passedPrefix = true; | ||
| } | ||
| filterRow = (cmp != 0); | ||
| return filterRow; | ||
| provideHint = (!isReversed() && cmp < 0) || (isReversed() && cmp > 0); | ||
PDavid marked this conversation as resolved.
Show resolved
Hide resolved
|
||
| return passedPrefix; | ||
| } | ||
|
|
||
| @Override | ||
| public ReturnCode filterCell(final Cell c) { | ||
| if (filterRow) return ReturnCode.NEXT_ROW; | ||
| if (provideHint) { | ||
| return ReturnCode.SEEK_NEXT_USING_HINT; | ||
| } | ||
| if (filterRow) { | ||
| return ReturnCode.NEXT_ROW; | ||
| } | ||
| return ReturnCode.INCLUDE; | ||
| } | ||
|
|
||
|
|
@@ -105,7 +126,9 @@ public static Filter createFilterFromArguments(ArrayList<byte[]> filterArguments | |
| @Override | ||
| public byte[] toByteArray() { | ||
| FilterProtos.PrefixFilter.Builder builder = FilterProtos.PrefixFilter.newBuilder(); | ||
| if (this.prefix != null) builder.setPrefix(UnsafeByteOperations.unsafeWrap(this.prefix)); | ||
| if (this.prefix != null) { | ||
| builder.setPrefix(UnsafeByteOperations.unsafeWrap(this.prefix)); | ||
| } | ||
| return builder.build().toByteArray(); | ||
| } | ||
|
|
||
|
|
@@ -142,6 +165,28 @@ boolean areSerializedFieldsEqual(Filter o) { | |
| return Bytes.equals(this.getPrefix(), other.getPrefix()); | ||
| } | ||
|
|
||
| @Override | ||
PDavid marked this conversation as resolved.
Outdated
Show resolved
Hide resolved
|
||
| public Cell getNextCellHint(Cell cell) { | ||
| if (reversed) { | ||
| return reversedNextCellHint; | ||
| } else { | ||
| // On forward scan hint should be the prefix | ||
| return PrivateCellUtil.createFirstOnRow(prefix, 0, (short) prefix.length); | ||
PDavid marked this conversation as resolved.
Outdated
Show resolved
Hide resolved
|
||
| } | ||
| } | ||
|
|
||
| private byte[] increaseLastNonMaxByte(byte[] bytes) { | ||
| byte[] result = Arrays.copyOf(bytes, bytes.length); | ||
| for (int i = bytes.length - 1; i >= 0; i--) { | ||
| byte b = bytes[i]; | ||
| if (b < Byte.MAX_VALUE) { | ||
| result[i] = (byte) (b + 1); | ||
| break; | ||
| } | ||
| } | ||
| return result; | ||
| } | ||
|
||
|
|
||
| @Override | ||
| public String toString() { | ||
| return this.getClass().getSimpleName() + " " + Bytes.toStringBinary(this.prefix); | ||
|
|
||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -17,9 +17,14 @@ | |
| */ | ||
| package org.apache.hadoop.hbase.filter; | ||
|
|
||
| import static org.junit.Assert.*; | ||
| import static org.junit.Assert.assertEquals; | ||
| import static org.junit.Assert.assertFalse; | ||
| import static org.junit.Assert.assertNotNull; | ||
| import static org.junit.Assert.assertTrue; | ||
|
|
||
| import org.apache.hadoop.hbase.Cell; | ||
| import org.apache.hadoop.hbase.HBaseClassTestRule; | ||
| import org.apache.hadoop.hbase.KeyValue; | ||
| import org.apache.hadoop.hbase.KeyValueUtil; | ||
| import org.apache.hadoop.hbase.testclassification.FilterTests; | ||
| import org.apache.hadoop.hbase.testclassification.SmallTests; | ||
|
|
@@ -40,7 +45,6 @@ public class TestPrefixFilter { | |
| static final char FIRST_CHAR = 'a'; | ||
| static final char LAST_CHAR = 'e'; | ||
| static final String HOST_PREFIX = "org.apache.site-"; | ||
| static final byte[] GOOD_BYTES = Bytes.toBytes("abc"); | ||
|
Contributor
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. This is also a bit unrelated - an unused constant field. |
||
|
|
||
| @Before | ||
| public void setUp() throws Exception { | ||
|
|
@@ -54,7 +58,7 @@ public void testPrefixOnRow() throws Exception { | |
|
|
||
| @Test | ||
| public void testPrefixOnRowInsideWhileMatchRow() throws Exception { | ||
| prefixRowTests(new WhileMatchFilter(this.mainFilter), true); | ||
| prefixRowTests(new WhileMatchFilter(this.mainFilter)); | ||
| } | ||
|
|
||
| @Test | ||
|
|
@@ -70,10 +74,6 @@ public void testSerialization() throws Exception { | |
| } | ||
|
|
||
| private void prefixRowTests(Filter filter) throws Exception { | ||
| prefixRowTests(filter, false); | ||
| } | ||
|
|
||
| private void prefixRowTests(Filter filter, boolean lastFilterAllRemaining) throws Exception { | ||
| for (char c = FIRST_CHAR; c <= LAST_CHAR; c++) { | ||
| byte[] t = createRow(c); | ||
| assertFalse("Failed with character " + c, | ||
|
|
@@ -82,13 +82,153 @@ private void prefixRowTests(Filter filter, boolean lastFilterAllRemaining) throw | |
| } | ||
| String yahooSite = "com.yahoo.www"; | ||
| byte[] yahooSiteBytes = Bytes.toBytes(yahooSite); | ||
| assertTrue("Failed with character " + yahooSite, | ||
| assertFalse("Failed with character " + yahooSite, | ||
| filter.filterRowKey(KeyValueUtil.createFirstOnRow(yahooSiteBytes))); | ||
| assertEquals(filter.filterAllRemaining(), lastFilterAllRemaining); | ||
| assertFalse(filter.filterAllRemaining()); | ||
| } | ||
|
|
||
| private byte[] createRow(final char c) { | ||
| return Bytes.toBytes(HOST_PREFIX + Character.toString(c)); | ||
| } | ||
|
|
||
| @Test | ||
| public void shouldProvideHintWhenKeyBefore() { | ||
| byte[] prefix = Bytes.toBytes("gg"); | ||
| PrefixFilter filter = new PrefixFilter(prefix); | ||
|
|
||
| KeyValue cell = KeyValueUtil.createFirstOnRow(Bytes.toBytes("aa")); | ||
|
|
||
| // Should include this row so that filterCell() will be invoked. | ||
| assertFalse(filter.filterRowKey(cell)); | ||
| assertEquals(Filter.ReturnCode.SEEK_NEXT_USING_HINT, filter.filterCell(cell)); | ||
| Cell actualCellHint = filter.getNextCellHint(cell); | ||
| assertNotNull(actualCellHint); | ||
| Cell expectedCellHint = KeyValueUtil.createFirstOnRow(prefix); | ||
| assertEquals(expectedCellHint, actualCellHint); | ||
| assertFalse(filter.filterAllRemaining()); | ||
| assertTrue(filter.filterRow()); | ||
| } | ||
|
|
||
| @Test | ||
| public void shouldProvideHintWhenKeyBeforeAndShorter() { | ||
| byte[] prefix = Bytes.toBytes("gggg"); | ||
| PrefixFilter filter = new PrefixFilter(prefix); | ||
|
|
||
| KeyValue cell = KeyValueUtil.createFirstOnRow(Bytes.toBytes("aa")); | ||
|
|
||
| // Should include this row so that filterCell() will be invoked. | ||
| assertFalse(filter.filterRowKey(cell)); | ||
| assertEquals(Filter.ReturnCode.SEEK_NEXT_USING_HINT, filter.filterCell(cell)); | ||
| Cell actualCellHint = filter.getNextCellHint(cell); | ||
| assertNotNull(actualCellHint); | ||
| Cell expectedCellHint = KeyValueUtil.createFirstOnRow(prefix); | ||
| assertEquals(expectedCellHint, actualCellHint); | ||
| assertFalse(filter.filterAllRemaining()); | ||
| assertTrue(filter.filterRow()); | ||
| } | ||
|
|
||
| @Test | ||
| public void shouldIncludeWhenKeyMatches() { | ||
| PrefixFilter filter = new PrefixFilter(Bytes.toBytes("gg")); | ||
|
|
||
| KeyValue matchingCell = KeyValueUtil.createFirstOnRow(Bytes.toBytes("gg")); | ||
|
|
||
| assertFalse(filter.filterRowKey(matchingCell)); | ||
| assertEquals(Filter.ReturnCode.INCLUDE, filter.filterCell(matchingCell)); | ||
| assertFalse(filter.filterAllRemaining()); | ||
| assertFalse(filter.filterRow()); | ||
| } | ||
|
|
||
| @Test | ||
| public void shouldReturnNextRowWhenKeyAfter() { | ||
| PrefixFilter filter = new PrefixFilter(Bytes.toBytes("gg")); | ||
|
|
||
| KeyValue afterCell = KeyValueUtil.createFirstOnRow(Bytes.toBytes("pp")); | ||
|
|
||
| assertTrue(filter.filterRowKey(afterCell)); | ||
| assertEquals(Filter.ReturnCode.NEXT_ROW, filter.filterCell(afterCell)); | ||
| assertTrue(filter.filterAllRemaining()); | ||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. This API is so awkward... |
||
| assertTrue(filter.filterRow()); | ||
| } | ||
|
|
||
| @Test | ||
| public void shouldProvideHintWhenKeyBeforeReversed() { | ||
| PrefixFilter filter = new PrefixFilter(Bytes.toBytes("aa")); | ||
| filter.setReversed(true); | ||
|
|
||
| KeyValue cell = KeyValueUtil.createFirstOnRow(Bytes.toBytes("x")); | ||
|
|
||
| // Should include this row so that filterCell() will be invoked. | ||
| assertFalse(filter.filterRowKey(cell)); | ||
| assertEquals(Filter.ReturnCode.SEEK_NEXT_USING_HINT, filter.filterCell(cell)); | ||
| Cell actualCellHint = filter.getNextCellHint(cell); | ||
| assertNotNull(actualCellHint); | ||
| Cell expectedCellHint = KeyValueUtil.createFirstOnRow(Bytes.toBytes("ab")); | ||
| assertEquals(expectedCellHint, actualCellHint); | ||
| assertFalse(filter.filterAllRemaining()); | ||
| assertTrue(filter.filterRow()); | ||
| } | ||
|
|
||
| @Test | ||
| public void hintShouldIncreaseLastNonMaxByteWhenReversed() { | ||
| PrefixFilter filter = new PrefixFilter(new byte[] { 'a', 'a', Byte.MAX_VALUE }); | ||
| filter.setReversed(true); | ||
|
|
||
| KeyValue cell = KeyValueUtil.createFirstOnRow(Bytes.toBytes("x")); | ||
|
|
||
| // Should include this row so that filterCell() will be invoked. | ||
| assertFalse(filter.filterRowKey(cell)); | ||
| assertEquals(Filter.ReturnCode.SEEK_NEXT_USING_HINT, filter.filterCell(cell)); | ||
| Cell actualCellHint = filter.getNextCellHint(cell); | ||
| assertNotNull(actualCellHint); | ||
| Cell expectedCellHint = KeyValueUtil.createFirstOnRow(new byte[] { 'a', 'b', Byte.MAX_VALUE }); | ||
| assertEquals(expectedCellHint, actualCellHint); | ||
| assertFalse(filter.filterAllRemaining()); | ||
| assertTrue(filter.filterRow()); | ||
| } | ||
|
|
||
| @Test | ||
| public void shouldIncludeWhenKeyMatchesReversed() { | ||
| PrefixFilter filter = new PrefixFilter(Bytes.toBytes("aa")); | ||
| filter.setReversed(true); | ||
|
|
||
| KeyValue matchingCell = KeyValueUtil.createFirstOnRow(Bytes.toBytes("aa")); | ||
|
|
||
| assertFalse(filter.filterRowKey(matchingCell)); | ||
| assertEquals(Filter.ReturnCode.INCLUDE, filter.filterCell(matchingCell)); | ||
| assertFalse(filter.filterAllRemaining()); | ||
| assertFalse(filter.filterRow()); | ||
| } | ||
|
|
||
| @Test | ||
| public void shouldReturnNextRowWhenKeyAfterReversed() { | ||
| PrefixFilter filter = new PrefixFilter(Bytes.toBytes("dd")); | ||
| filter.setReversed(true); | ||
|
|
||
| KeyValue cell = KeyValueUtil.createFirstOnRow(Bytes.toBytes("aa")); | ||
|
|
||
| assertTrue(filter.filterRowKey(cell)); | ||
| assertEquals(Filter.ReturnCode.NEXT_ROW, filter.filterCell(cell)); | ||
| assertTrue(filter.filterAllRemaining()); | ||
| assertTrue(filter.filterRow()); | ||
| } | ||
|
|
||
| @Test | ||
| public void hintShouldNotIncreaseMaxBytesWhenReversed() { | ||
| PrefixFilter filter = | ||
| new PrefixFilter(new byte[] { Byte.MAX_VALUE, Byte.MAX_VALUE, Byte.MAX_VALUE }); | ||
| filter.setReversed(true); | ||
|
|
||
| KeyValue cell = KeyValueUtil.createFirstOnRow(Bytes.toBytes("x")); | ||
|
|
||
| assertTrue(filter.filterRowKey(cell)); | ||
| assertEquals(Filter.ReturnCode.NEXT_ROW, filter.filterCell(cell)); | ||
| Cell actualCellHint = filter.getNextCellHint(cell); | ||
| assertNotNull(actualCellHint); | ||
| Cell expectedCellHint = | ||
| KeyValueUtil.createFirstOnRow(new byte[] { Byte.MAX_VALUE, Byte.MAX_VALUE, Byte.MAX_VALUE }); | ||
| assertEquals(expectedCellHint, actualCellHint); | ||
| assertTrue(filter.filterAllRemaining()); | ||
| assertTrue(filter.filterRow()); | ||
| } | ||
| } | ||
Uh oh!
There was an error while loading. Please reload this page.