Skip to content
Merged
Show file tree
Hide file tree
Changes from 3 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -43,18 +43,25 @@ public class PrefixFilter extends FilterBase implements HintingFilter {
protected boolean filterRow = true;
protected boolean provideHint = false;
protected Cell reversedNextCellHint;
protected Cell forwardNextCellHint;

public PrefixFilter(final byte[] prefix) {
this.prefix = prefix;
// Pre-compute reverse hint at creation to avoid re-computing it several times in the corner
// Pre-compute hints at creation to avoid re-computing them several times in the corner
// case where there are a lot of cells between the hint and the first real match.
this.reversedNextCellHint = createReversedNextCellHint();
createCellHints();
}

private Cell createReversedNextCellHint() {
private void createCellHints() {
if (prefix == null) {
return;
}
// On reversed scan hint should be the prefix with last byte incremented
byte[] reversedHintBytes = increaseLastNonMaxByte(this.prefix);
return PrivateCellUtil.createFirstOnRow(reversedHintBytes, 0, (short) reversedHintBytes.length);
this.reversedNextCellHint =
Copy link
Copy Markdown
Contributor

@stoty stoty Oct 30, 2024

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I was about to say that we could choose the direction here, but interestingly the direction is not available here yet.
Another API awkwardness.

Copy link
Copy Markdown
Contributor Author

@PDavid PDavid Oct 30, 2024

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Exactly. The direction can be influenced with setReversed() which can be invoked only after the filter creation.

So it is available but can be changed after creation.

Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Practically, I'm sure it's not changed after any of the Cell processing methods have been called.
It is called once when the Scan is set up. (perhaps also after reset() is called)

PrivateCellUtil.createFirstOnRow(reversedHintBytes, 0, (short) reversedHintBytes.length);
// On forward scan hint should be the prefix
this.forwardNextCellHint = PrivateCellUtil.createFirstOnRow(prefix, 0, (short) prefix.length);
}

public byte[] getPrefix() {
Expand Down Expand Up @@ -115,6 +122,27 @@ public boolean filterAllRemaining() {
return passedPrefix;
}

@Override
public Cell getNextCellHint(Cell cell) {
if (reversed) {
return reversedNextCellHint;
} else {
return forwardNextCellHint;
}
}

private byte[] increaseLastNonMaxByte(byte[] bytes) {
byte[] result = Arrays.copyOf(bytes, bytes.length);
for (int i = bytes.length - 1; i >= 0; i--) {
byte b = bytes[i];
if (b < Byte.MAX_VALUE) {
result[i] = (byte) (b + 1);
break;
}
}
return result;
}

public static Filter createFilterFromArguments(ArrayList<byte[]> filterArguments) {
Preconditions.checkArgument(filterArguments.size() == 1, "Expected 1 but got: %s",
filterArguments.size());
Expand Down Expand Up @@ -165,28 +193,6 @@ boolean areSerializedFieldsEqual(Filter o) {
return Bytes.equals(this.getPrefix(), other.getPrefix());
}

@Override
public Cell getNextCellHint(Cell cell) {
if (reversed) {
return reversedNextCellHint;
} else {
// On forward scan hint should be the prefix
return PrivateCellUtil.createFirstOnRow(prefix, 0, (short) prefix.length);
}
}

private byte[] increaseLastNonMaxByte(byte[] bytes) {
byte[] result = Arrays.copyOf(bytes, bytes.length);
for (int i = bytes.length - 1; i >= 0; i--) {
byte b = bytes[i];
if (b < Byte.MAX_VALUE) {
result[i] = (byte) (b + 1);
break;
}
}
return result;
}

@Override
Comment thread
PDavid marked this conversation as resolved.
Outdated
public String toString() {
return this.getClass().getSimpleName() + " " + Bytes.toStringBinary(this.prefix);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,7 @@
import static org.junit.Assert.assertEquals;
import static org.junit.Assert.assertFalse;
import static org.junit.Assert.assertNotNull;
import static org.junit.Assert.assertNull;
import static org.junit.Assert.assertTrue;

import org.apache.hadoop.hbase.Cell;
Expand Down Expand Up @@ -58,7 +59,7 @@ public void testPrefixOnRow() throws Exception {

@Test
public void testPrefixOnRowInsideWhileMatchRow() throws Exception {
prefixRowTests(new WhileMatchFilter(this.mainFilter));
prefixRowTests(new WhileMatchFilter(this.mainFilter), true);
}

@Test
Expand All @@ -74,6 +75,10 @@ public void testSerialization() throws Exception {
}

private void prefixRowTests(Filter filter) throws Exception {
prefixRowTests(filter, false);
}

private void prefixRowTests(Filter filter, boolean lastFilterAllRemaining) throws Exception {
for (char c = FIRST_CHAR; c <= LAST_CHAR; c++) {
byte[] t = createRow(c);
assertFalse("Failed with character " + c,
Expand All @@ -82,9 +87,10 @@ private void prefixRowTests(Filter filter) throws Exception {
}
String yahooSite = "com.yahoo.www";
byte[] yahooSiteBytes = Bytes.toBytes(yahooSite);
assertFalse("Failed with character " + yahooSite,
filter.filterRowKey(KeyValueUtil.createFirstOnRow(yahooSiteBytes)));
assertFalse(filter.filterAllRemaining());
KeyValue yahooSiteCell = KeyValueUtil.createFirstOnRow(yahooSiteBytes);
assertFalse("Failed with character " + yahooSite, filter.filterRowKey(yahooSiteCell));
assertEquals(Filter.ReturnCode.SEEK_NEXT_USING_HINT, filter.filterCell(yahooSiteCell));
assertEquals(lastFilterAllRemaining, filter.filterAllRemaining());
}

private byte[] createRow(final char c) {
Expand Down Expand Up @@ -231,4 +237,32 @@ public void hintShouldNotIncreaseMaxBytesWhenReversed() {
assertTrue(filter.filterAllRemaining());
assertTrue(filter.filterRow());
}

@Test
public void shouldNotThrowWhenCreatedWithNullPrefix() {
PrefixFilter filter = new PrefixFilter(null);
KeyValue cell = KeyValueUtil.createFirstOnRow(Bytes.toBytes("doesNotMatter"));

assertNull(filter.getNextCellHint(cell));
filter.setReversed(true);
assertNull(filter.getNextCellHint(cell));
}

@Test
public void shouldNotThrowWhenCreatedWithEmptyByteArrayPrefix() {
byte[] emptyPrefix = {};
KeyValue emptyPrefixCell = KeyValueUtil.createFirstOnRow(emptyPrefix);
KeyValue cell = KeyValueUtil.createFirstOnRow(Bytes.toBytes("doesNotMatter"));

PrefixFilter filter = new PrefixFilter(emptyPrefix);

Cell forwardNextCellHint = filter.getNextCellHint(cell);
assertNotNull(forwardNextCellHint);
assertEquals(emptyPrefixCell, forwardNextCellHint);

filter.setReversed(true);
Cell reverseNextCellHint = filter.getNextCellHint(cell);
assertNotNull(reverseNextCellHint);
assertEquals(emptyPrefixCell, reverseNextCellHint);
}
}