Skip to content
Merged
Show file tree
Hide file tree
Changes from 9 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -18,8 +18,10 @@
package org.apache.hadoop.hbase.filter;

import java.util.ArrayList;
import java.util.Arrays;
import org.apache.hadoop.hbase.ByteBufferExtendedCell;
import org.apache.hadoop.hbase.Cell;
import org.apache.hadoop.hbase.PrivateCellUtil;
import org.apache.hadoop.hbase.exceptions.DeserializationException;
import org.apache.hadoop.hbase.util.ByteBufferUtils;
import org.apache.hadoop.hbase.util.Bytes;
Expand All @@ -35,13 +37,24 @@
* Pass results that have same row prefix.
*/
@InterfaceAudience.Public
public class PrefixFilter extends FilterBase {
public class PrefixFilter extends FilterBase implements HintingFilter {
protected byte[] prefix = null;
protected boolean passedPrefix = false;
protected boolean filterRow = true;
protected boolean provideHint = false;
protected Cell reversedNextCellHint;

public PrefixFilter(final byte[] prefix) {
this.prefix = prefix;
// Pre-compute reverse hint at creation to avoid re-computing it several times in the corner
// case where there are a lot of cells between the hint and the first real match.
this.reversedNextCellHint = createReversedNextCellHint();
}

private Cell createReversedNextCellHint() {
// On reversed scan hint should be the prefix with last byte incremented
byte[] reversedHintBytes = increaseLastNonMaxByte(this.prefix);
return PrivateCellUtil.createFirstOnRow(reversedHintBytes, 0, (short) reversedHintBytes.length);
}

public byte[] getPrefix() {
Expand All @@ -50,10 +63,12 @@ public byte[] getPrefix() {

@Override
public boolean filterRowKey(Cell firstRowCell) {
if (firstRowCell == null || this.prefix == null) return true;
if (filterAllRemaining()) return true;
int length = firstRowCell.getRowLength();
if (length < prefix.length) return true;
if (firstRowCell == null || this.prefix == null) {
return true;
}
if (filterAllRemaining()) {
return true;
}
// if they are equal, return false => pass row
// else return true, filter row
// if we are passed the prefix, set flag
Expand All @@ -70,12 +85,18 @@ public boolean filterRowKey(Cell firstRowCell) {
passedPrefix = true;
}
filterRow = (cmp != 0);
return filterRow;
provideHint = (!isReversed() && cmp < 0) || (isReversed() && cmp > 0);
return passedPrefix;
}

@Override
public ReturnCode filterCell(final Cell c) {
if (filterRow) return ReturnCode.NEXT_ROW;
if (provideHint) {
return ReturnCode.SEEK_NEXT_USING_HINT;
}
if (filterRow) {
return ReturnCode.NEXT_ROW;
}
return ReturnCode.INCLUDE;
}

Expand Down Expand Up @@ -105,7 +126,9 @@ public static Filter createFilterFromArguments(ArrayList<byte[]> filterArguments
@Override
public byte[] toByteArray() {
FilterProtos.PrefixFilter.Builder builder = FilterProtos.PrefixFilter.newBuilder();
if (this.prefix != null) builder.setPrefix(UnsafeByteOperations.unsafeWrap(this.prefix));
if (this.prefix != null) {
builder.setPrefix(UnsafeByteOperations.unsafeWrap(this.prefix));
}
return builder.build().toByteArray();
}

Expand Down Expand Up @@ -142,6 +165,28 @@ boolean areSerializedFieldsEqual(Filter o) {
return Bytes.equals(this.getPrefix(), other.getPrefix());
}

@Override
public Cell getNextCellHint(Cell cell) {
if (reversed) {
return reversedNextCellHint;
} else {
// On forward scan hint should be the prefix
return PrivateCellUtil.createFirstOnRow(prefix, 0, (short) prefix.length);
}
}

private byte[] increaseLastNonMaxByte(byte[] bytes) {
byte[] result = Arrays.copyOf(bytes, bytes.length);
for (int i = bytes.length - 1; i >= 0; i--) {
byte b = bytes[i];
if (b < Byte.MAX_VALUE) {
result[i] = (byte) (b + 1);
break;
}
}
return result;
}
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Unfortunately I did not yet found an existing util method which would do the same.
What I tried for example PrivateCellUtil.createFirstOnNextRow(Cell) which is similar but not the same (and tests are failing if that is used).

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

That's OK.
We can keep this here, or move it to PrivateCellUtil before commit.


@Override
public String toString() {
return this.getClass().getSimpleName() + " " + Bytes.toStringBinary(this.prefix);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -17,9 +17,14 @@
*/
package org.apache.hadoop.hbase.filter;

import static org.junit.Assert.*;
import static org.junit.Assert.assertEquals;
import static org.junit.Assert.assertFalse;
import static org.junit.Assert.assertNotNull;
import static org.junit.Assert.assertTrue;

import org.apache.hadoop.hbase.Cell;
import org.apache.hadoop.hbase.HBaseClassTestRule;
import org.apache.hadoop.hbase.KeyValue;
import org.apache.hadoop.hbase.KeyValueUtil;
import org.apache.hadoop.hbase.testclassification.FilterTests;
import org.apache.hadoop.hbase.testclassification.SmallTests;
Expand All @@ -40,7 +45,6 @@ public class TestPrefixFilter {
static final char FIRST_CHAR = 'a';
static final char LAST_CHAR = 'e';
static final String HOST_PREFIX = "org.apache.site-";
static final byte[] GOOD_BYTES = Bytes.toBytes("abc");
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This is also a bit unrelated - an unused constant field.


@Before
public void setUp() throws Exception {
Expand All @@ -54,7 +58,7 @@ public void testPrefixOnRow() throws Exception {

@Test
public void testPrefixOnRowInsideWhileMatchRow() throws Exception {
prefixRowTests(new WhileMatchFilter(this.mainFilter), true);
prefixRowTests(new WhileMatchFilter(this.mainFilter));
}

@Test
Expand All @@ -70,10 +74,6 @@ public void testSerialization() throws Exception {
}

private void prefixRowTests(Filter filter) throws Exception {
prefixRowTests(filter, false);
}

private void prefixRowTests(Filter filter, boolean lastFilterAllRemaining) throws Exception {
for (char c = FIRST_CHAR; c <= LAST_CHAR; c++) {
byte[] t = createRow(c);
assertFalse("Failed with character " + c,
Expand All @@ -82,13 +82,153 @@ private void prefixRowTests(Filter filter, boolean lastFilterAllRemaining) throw
}
String yahooSite = "com.yahoo.www";
byte[] yahooSiteBytes = Bytes.toBytes(yahooSite);
assertTrue("Failed with character " + yahooSite,
assertFalse("Failed with character " + yahooSite,
filter.filterRowKey(KeyValueUtil.createFirstOnRow(yahooSiteBytes)));
assertEquals(filter.filterAllRemaining(), lastFilterAllRemaining);
assertFalse(filter.filterAllRemaining());
}

private byte[] createRow(final char c) {
return Bytes.toBytes(HOST_PREFIX + Character.toString(c));
}

@Test
public void shouldProvideHintWhenKeyBefore() {
byte[] prefix = Bytes.toBytes("gg");
PrefixFilter filter = new PrefixFilter(prefix);

KeyValue cell = KeyValueUtil.createFirstOnRow(Bytes.toBytes("aa"));

// Should include this row so that filterCell() will be invoked.
assertFalse(filter.filterRowKey(cell));
assertEquals(Filter.ReturnCode.SEEK_NEXT_USING_HINT, filter.filterCell(cell));
Cell actualCellHint = filter.getNextCellHint(cell);
assertNotNull(actualCellHint);
Cell expectedCellHint = KeyValueUtil.createFirstOnRow(prefix);
assertEquals(expectedCellHint, actualCellHint);
assertFalse(filter.filterAllRemaining());
assertTrue(filter.filterRow());
}

@Test
public void shouldProvideHintWhenKeyBeforeAndShorter() {
byte[] prefix = Bytes.toBytes("gggg");
PrefixFilter filter = new PrefixFilter(prefix);

KeyValue cell = KeyValueUtil.createFirstOnRow(Bytes.toBytes("aa"));

// Should include this row so that filterCell() will be invoked.
assertFalse(filter.filterRowKey(cell));
assertEquals(Filter.ReturnCode.SEEK_NEXT_USING_HINT, filter.filterCell(cell));
Cell actualCellHint = filter.getNextCellHint(cell);
assertNotNull(actualCellHint);
Cell expectedCellHint = KeyValueUtil.createFirstOnRow(prefix);
assertEquals(expectedCellHint, actualCellHint);
assertFalse(filter.filterAllRemaining());
assertTrue(filter.filterRow());
}

@Test
public void shouldIncludeWhenKeyMatches() {
PrefixFilter filter = new PrefixFilter(Bytes.toBytes("gg"));

KeyValue matchingCell = KeyValueUtil.createFirstOnRow(Bytes.toBytes("gg"));

assertFalse(filter.filterRowKey(matchingCell));
assertEquals(Filter.ReturnCode.INCLUDE, filter.filterCell(matchingCell));
assertFalse(filter.filterAllRemaining());
assertFalse(filter.filterRow());
}

@Test
public void shouldReturnNextRowWhenKeyAfter() {
PrefixFilter filter = new PrefixFilter(Bytes.toBytes("gg"));

KeyValue afterCell = KeyValueUtil.createFirstOnRow(Bytes.toBytes("pp"));

assertTrue(filter.filterRowKey(afterCell));
assertEquals(Filter.ReturnCode.NEXT_ROW, filter.filterCell(afterCell));
assertTrue(filter.filterAllRemaining());
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This API is so awkward...

assertTrue(filter.filterRow());
}

@Test
public void shouldProvideHintWhenKeyBeforeReversed() {
PrefixFilter filter = new PrefixFilter(Bytes.toBytes("aa"));
filter.setReversed(true);

KeyValue cell = KeyValueUtil.createFirstOnRow(Bytes.toBytes("x"));

// Should include this row so that filterCell() will be invoked.
assertFalse(filter.filterRowKey(cell));
assertEquals(Filter.ReturnCode.SEEK_NEXT_USING_HINT, filter.filterCell(cell));
Cell actualCellHint = filter.getNextCellHint(cell);
assertNotNull(actualCellHint);
Cell expectedCellHint = KeyValueUtil.createFirstOnRow(Bytes.toBytes("ab"));
assertEquals(expectedCellHint, actualCellHint);
assertFalse(filter.filterAllRemaining());
assertTrue(filter.filterRow());
}

@Test
public void hintShouldIncreaseLastNonMaxByteWhenReversed() {
PrefixFilter filter = new PrefixFilter(new byte[] { 'a', 'a', Byte.MAX_VALUE });
filter.setReversed(true);

KeyValue cell = KeyValueUtil.createFirstOnRow(Bytes.toBytes("x"));

// Should include this row so that filterCell() will be invoked.
assertFalse(filter.filterRowKey(cell));
assertEquals(Filter.ReturnCode.SEEK_NEXT_USING_HINT, filter.filterCell(cell));
Cell actualCellHint = filter.getNextCellHint(cell);
assertNotNull(actualCellHint);
Cell expectedCellHint = KeyValueUtil.createFirstOnRow(new byte[] { 'a', 'b', Byte.MAX_VALUE });
assertEquals(expectedCellHint, actualCellHint);
assertFalse(filter.filterAllRemaining());
assertTrue(filter.filterRow());
}

@Test
public void shouldIncludeWhenKeyMatchesReversed() {
PrefixFilter filter = new PrefixFilter(Bytes.toBytes("aa"));
filter.setReversed(true);

KeyValue matchingCell = KeyValueUtil.createFirstOnRow(Bytes.toBytes("aa"));

assertFalse(filter.filterRowKey(matchingCell));
assertEquals(Filter.ReturnCode.INCLUDE, filter.filterCell(matchingCell));
assertFalse(filter.filterAllRemaining());
assertFalse(filter.filterRow());
}

@Test
public void shouldReturnNextRowWhenKeyAfterReversed() {
PrefixFilter filter = new PrefixFilter(Bytes.toBytes("dd"));
filter.setReversed(true);

KeyValue cell = KeyValueUtil.createFirstOnRow(Bytes.toBytes("aa"));

assertTrue(filter.filterRowKey(cell));
assertEquals(Filter.ReturnCode.NEXT_ROW, filter.filterCell(cell));
assertTrue(filter.filterAllRemaining());
assertTrue(filter.filterRow());
}

@Test
public void hintShouldNotIncreaseMaxBytesWhenReversed() {
PrefixFilter filter =
new PrefixFilter(new byte[] { Byte.MAX_VALUE, Byte.MAX_VALUE, Byte.MAX_VALUE });
filter.setReversed(true);

KeyValue cell = KeyValueUtil.createFirstOnRow(Bytes.toBytes("x"));

assertTrue(filter.filterRowKey(cell));
assertEquals(Filter.ReturnCode.NEXT_ROW, filter.filterCell(cell));
Cell actualCellHint = filter.getNextCellHint(cell);
assertNotNull(actualCellHint);
Cell expectedCellHint =
KeyValueUtil.createFirstOnRow(new byte[] { Byte.MAX_VALUE, Byte.MAX_VALUE, Byte.MAX_VALUE });
assertEquals(expectedCellHint, actualCellHint);
assertTrue(filter.filterAllRemaining());
assertTrue(filter.filterRow());
}
}