Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -86,12 +86,12 @@ public class GroupVIntBenchmark {
};

final int maxSize = 256;
final long[] docs = new long[maxSize];
final long[] values = new long[maxSize];
final int[] docs = new int[maxSize];
final int[] values = new int[maxSize];

IndexInput byteBufferGVIntIn;
IndexInput mmapGVIntIn;
IndexInput nioGVIntIn;
IndexInput byteBufferVIntIn;
IndexInput mmapVIntIn;
ByteBuffersDataInput byteBuffersGVIntIn;

ByteArrayDataInput byteArrayVIntIn;
Expand All @@ -103,7 +103,7 @@ public class GroupVIntBenchmark {
@Param({"64"})
public int size;

void initArrayInput(long[] docs) throws Exception {
void initArrayInput(int[] docs) throws Exception {
byte[] gVIntBytes = new byte[Integer.BYTES * maxSize * 2];
byte[] vIntBytes = new byte[Integer.BYTES * maxSize * 2];
ByteArrayDataOutput vIntOut = new ByteArrayDataOutput(vIntBytes);
Expand All @@ -116,21 +116,21 @@ void initArrayInput(long[] docs) throws Exception {
byteArrayGVIntIn = new ByteArrayDataInput(gVIntBytes);
}

void initNioInput(long[] docs) throws Exception {
void initNioInput(int[] docs) throws Exception {
Directory dir = new NIOFSDirectory(Files.createTempDirectory("groupvintdata"));
IndexOutput out = dir.createOutput("gvint", IOContext.DEFAULT);
out.writeGroupVInts(docs, docs.length);
out.close();
nioGVIntIn = dir.openInput("gvint", IOContext.DEFAULT);
}

void initByteBuffersInput(long[] docs) throws Exception {
void initByteBuffersInput(int[] docs) throws Exception {
ByteBuffersDataOutput buffer = new ByteBuffersDataOutput();
buffer.writeGroupVInts(docs, docs.length);
byteBuffersGVIntIn = buffer.toDataInput();
}

void initByteBufferInput(long[] docs) throws Exception {
void initMMapInput(int[] docs) throws Exception {
Directory dir = new MMapDirectory(Files.createTempDirectory("groupvintdata"));
IndexOutput vintOut = dir.createOutput("vint", IOContext.DEFAULT);
IndexOutput gvintOut = dir.createOutput("gvint", IOContext.DEFAULT);
Expand All @@ -141,11 +141,11 @@ void initByteBufferInput(long[] docs) throws Exception {
}
vintOut.close();
gvintOut.close();
byteBufferGVIntIn = dir.openInput("gvint", IOContext.DEFAULT);
byteBufferVIntIn = dir.openInput("vint", IOContext.DEFAULT);
mmapGVIntIn = dir.openInput("gvint", IOContext.DEFAULT);
mmapVIntIn = dir.openInput("vint", IOContext.DEFAULT);
}

private void readGroupVIntsBaseline(DataInput in, long[] dst, int limit) throws IOException {
private void readGroupVIntsBaseline(DataInput in, int[] dst, int limit) throws IOException {
int i;
for (i = 0; i <= limit - 4; i += 4) {
GroupVIntUtil.readGroupVInt(in, dst, i);
Expand All @@ -168,32 +168,32 @@ public void init() throws Exception {
}
docs[i] = r.nextInt(1 << (numBits - 1), 1 << numBits);
}
initByteBufferInput(docs);
initMMapInput(docs);
initArrayInput(docs);
initNioInput(docs);
initByteBuffersInput(docs);
}

@Benchmark
public void benchMMapDirectoryInputs_readVInt(Blackhole bh) throws IOException {
byteBufferVIntIn.seek(0);
mmapVIntIn.seek(0);
for (int i = 0; i < size; i++) {
values[i] = byteBufferVIntIn.readVInt();
values[i] = mmapVIntIn.readVInt();
}
bh.consume(values);
}

@Benchmark
public void benchMMapDirectoryInputs_readGroupVInt(Blackhole bh) throws IOException {
byteBufferGVIntIn.seek(0);
GroupVIntUtil.readGroupVInts(byteBufferGVIntIn, values, size);
mmapGVIntIn.seek(0);
GroupVIntUtil.readGroupVInts(mmapGVIntIn, values, size);
bh.consume(values);
}

@Benchmark
public void benchMMapDirectoryInputs_readGroupVIntBaseline(Blackhole bh) throws IOException {
byteBufferGVIntIn.seek(0);
this.readGroupVIntsBaseline(byteBufferGVIntIn, values, size);
mmapGVIntIn.seek(0);
this.readGroupVIntsBaseline(mmapGVIntIn, values, size);
bh.consume(values);
}

Expand Down
2 changes: 2 additions & 0 deletions lucene/core/src/java/org/apache/lucene/store/DataOutput.java
Original file line number Diff line number Diff line change
Expand Up @@ -333,7 +333,9 @@ public void writeSetOfStrings(Set<String> set) throws IOException {
* @param values the values to write
* @param limit the number of values to write.
* @lucene.experimental
* @deprecated This method is preserved only for backwards codecs
*/
@Deprecated
public void writeGroupVInts(long[] values, int limit) throws IOException {
if (groupVIntBytes == null) {
groupVIntBytes = new byte[GroupVIntUtil.MAX_LENGTH_PER_GROUP];
Expand Down
166 changes: 64 additions & 102 deletions lucene/core/src/java/org/apache/lucene/util/GroupVIntUtil.java
Original file line number Diff line number Diff line change
Expand Up @@ -29,28 +29,8 @@ public final class GroupVIntUtil {
// the maximum length of a single group-varint is 4 integers + 1 byte flag.
public static final int MAX_LENGTH_PER_GROUP = 17;

// we use long array instead of int array to make negative integer to be read as positive long.
private static final long[] LONG_MASKS = new long[] {0xFFL, 0xFFFFL, 0xFFFFFFL, 0xFFFFFFFFL};
private static final int[] INT_MASKS = new int[] {0xFF, 0xFFFF, 0xFFFFFF, ~0};

/**
* Read all the group varints, including the tail vints. we need a long[] because this is what
* postings are using, all longs are actually required to be integers.
*
* @param dst the array to read ints into.
* @param limit the number of int values to read.
* @lucene.experimental
*/
public static void readGroupVInts(DataInput in, long[] dst, int limit) throws IOException {
int i;
for (i = 0; i <= limit - 4; i += 4) {
readGroupVInt(in, dst, i);
}
for (; i < limit; ++i) {
dst[i] = in.readVInt() & 0xFFFFFFFFL;
}
}

/**
* Read all the group varints, including the tail vints.
*
Expand All @@ -68,28 +48,6 @@ public static void readGroupVInts(DataInput in, int[] dst, int limit) throws IOE
}
}

/**
* Default implementation of read single group, for optimal performance, you should use {@link
* GroupVIntUtil#readGroupVInts(DataInput, long[], int)} instead.
*
* @param in the input to use to read data.
* @param dst the array to read ints into.
* @param offset the offset in the array to start storing ints.
*/
public static void readGroupVInt(DataInput in, long[] dst, int offset) throws IOException {
final int flag = in.readByte() & 0xFF;

final int n1Minus1 = flag >> 6;
final int n2Minus1 = (flag >> 4) & 0x03;
final int n3Minus1 = (flag >> 2) & 0x03;
final int n4Minus1 = flag & 0x03;

dst[offset] = readIntInGroup(in, n1Minus1) & 0xFFFFFFFFL;
dst[offset + 1] = readIntInGroup(in, n2Minus1) & 0xFFFFFFFFL;
dst[offset + 2] = readIntInGroup(in, n3Minus1) & 0xFFFFFFFFL;
dst[offset + 3] = readIntInGroup(in, n4Minus1) & 0xFFFFFFFFL;
}

/**
* Default implementation of read single group, for optimal performance, you should use {@link
* GroupVIntUtil#readGroupVInts(DataInput, int[], int)} instead.
Expand Down Expand Up @@ -134,46 +92,6 @@ public static interface IntReader {
int read(long v);
}

/**
* Faster implementation of read single group, It read values from the buffer that would not cross
* boundaries.
*
* @param in the input to use to read data.
* @param remaining the number of remaining bytes allowed to read for current block/segment.
* @param reader the supplier of read int.
* @param pos the start pos to read from the reader.
* @param dst the array to read ints into.
* @param offset the offset in the array to start storing ints.
* @return the number of bytes read excluding the flag. this indicates the number of positions
* should to be increased for caller, it is 0 or positive number and less than {@link
* #MAX_LENGTH_PER_GROUP}
*/
public static int readGroupVInt(
DataInput in, long remaining, IntReader reader, long pos, long[] dst, int offset)
throws IOException {
if (remaining < MAX_LENGTH_PER_GROUP) {
readGroupVInt(in, dst, offset);
return 0;
}
final int flag = in.readByte() & 0xFF;
final long posStart = ++pos; // exclude the flag bytes, the position has updated via readByte().
final int n1Minus1 = flag >> 6;
final int n2Minus1 = (flag >> 4) & 0x03;
final int n3Minus1 = (flag >> 2) & 0x03;
final int n4Minus1 = flag & 0x03;

// This code path has fewer conditionals and tends to be significantly faster in benchmarks
dst[offset] = reader.read(pos) & LONG_MASKS[n1Minus1];
pos += 1 + n1Minus1;
dst[offset + 1] = reader.read(pos) & LONG_MASKS[n2Minus1];
pos += 1 + n2Minus1;
dst[offset + 2] = reader.read(pos) & LONG_MASKS[n3Minus1];
pos += 1 + n3Minus1;
dst[offset + 3] = reader.read(pos) & LONG_MASKS[n4Minus1];
pos += 1 + n4Minus1;
return (int) (pos - posStart);
}

/**
* Faster implementation of read single group, It read values from the buffer that would not cross
* boundaries.
Expand Down Expand Up @@ -230,69 +148,113 @@ private static int toInt(long value) {
* The implementation for group-varint encoding, It uses a maximum of {@link
* #MAX_LENGTH_PER_GROUP} bytes scratch buffer.
*/
public static void writeGroupVInts(DataOutput out, byte[] scratch, long[] values, int limit)
public static void writeGroupVInts(DataOutput out, byte[] scratch, int[] values, int limit)
throws IOException {
int readPos = 0;

// encode each group
while ((limit - readPos) >= 4) {
int writePos = 0;
final int n1Minus1 = numBytes(toInt(values[readPos])) - 1;
final int n2Minus1 = numBytes(toInt(values[readPos + 1])) - 1;
final int n3Minus1 = numBytes(toInt(values[readPos + 2])) - 1;
final int n4Minus1 = numBytes(toInt(values[readPos + 3])) - 1;
final int n1Minus1 = numBytes(values[readPos]) - 1;
final int n2Minus1 = numBytes(values[readPos + 1]) - 1;
final int n3Minus1 = numBytes(values[readPos + 2]) - 1;
final int n4Minus1 = numBytes(values[readPos + 3]) - 1;
int flag = (n1Minus1 << 6) | (n2Minus1 << 4) | (n3Minus1 << 2) | (n4Minus1);
scratch[writePos++] = (byte) flag;
BitUtil.VH_LE_INT.set(scratch, writePos, (int) (values[readPos++]));
BitUtil.VH_LE_INT.set(scratch, writePos, values[readPos++]);
writePos += n1Minus1 + 1;
BitUtil.VH_LE_INT.set(scratch, writePos, (int) (values[readPos++]));
BitUtil.VH_LE_INT.set(scratch, writePos, values[readPos++]);
writePos += n2Minus1 + 1;
BitUtil.VH_LE_INT.set(scratch, writePos, (int) (values[readPos++]));
BitUtil.VH_LE_INT.set(scratch, writePos, values[readPos++]);
writePos += n3Minus1 + 1;
BitUtil.VH_LE_INT.set(scratch, writePos, (int) (values[readPos++]));
BitUtil.VH_LE_INT.set(scratch, writePos, values[readPos++]);
writePos += n4Minus1 + 1;

out.writeBytes(scratch, writePos);
}

// tail vints
for (; readPos < limit; readPos++) {
out.writeVInt(toInt(values[readPos]));
out.writeVInt(values[readPos]);
}
}

/**
* Read all the group varints, including the tail vints to a long[].
*
* @param dst the array to read ints into.
* @param limit the number of int values to read.
* @lucene.experimental
* @deprecated Only for backwards codecs
*/
@Deprecated
public static void readGroupVInts(DataInput in, long[] dst, int limit) throws IOException {
int i;
for (i = 0; i <= limit - 4; i += 4) {
readGroupVInt(in, dst, i);
}
for (; i < limit; ++i) {
dst[i] = in.readVInt() & 0xFFFFFFFFL;
}
}

/**
* Default implementation of read single group, for optimal performance, you should use {@link
* GroupVIntUtil#readGroupVInts(DataInput, long[], int)} instead.
*
* @param in the input to use to read data.
* @param dst the array to read ints into.
* @param offset the offset in the array to start storing ints.
* @deprecated Only for backwards codecs
*/
@Deprecated
public static void readGroupVInt(DataInput in, long[] dst, int offset) throws IOException {
final int flag = in.readByte() & 0xFF;

final int n1Minus1 = flag >> 6;
final int n2Minus1 = (flag >> 4) & 0x03;
final int n3Minus1 = (flag >> 2) & 0x03;
final int n4Minus1 = flag & 0x03;

dst[offset] = readIntInGroup(in, n1Minus1) & 0xFFFFFFFFL;
dst[offset + 1] = readIntInGroup(in, n2Minus1) & 0xFFFFFFFFL;
dst[offset + 2] = readIntInGroup(in, n3Minus1) & 0xFFFFFFFFL;
dst[offset + 3] = readIntInGroup(in, n4Minus1) & 0xFFFFFFFFL;
}

/**
* The implementation for group-varint encoding, It uses a maximum of {@link
* #MAX_LENGTH_PER_GROUP} bytes scratch buffer.
*/
public static void writeGroupVInts(DataOutput out, byte[] scratch, int[] values, int limit)
@Deprecated
public static void writeGroupVInts(DataOutput out, byte[] scratch, long[] values, int limit)
throws IOException {
int readPos = 0;

// encode each group
while ((limit - readPos) >= 4) {
int writePos = 0;
final int n1Minus1 = numBytes(values[readPos]) - 1;
final int n2Minus1 = numBytes(values[readPos + 1]) - 1;
final int n3Minus1 = numBytes(values[readPos + 2]) - 1;
final int n4Minus1 = numBytes(values[readPos + 3]) - 1;
final int n1Minus1 = numBytes(toInt(values[readPos])) - 1;
final int n2Minus1 = numBytes(toInt(values[readPos + 1])) - 1;
final int n3Minus1 = numBytes(toInt(values[readPos + 2])) - 1;
final int n4Minus1 = numBytes(toInt(values[readPos + 3])) - 1;
int flag = (n1Minus1 << 6) | (n2Minus1 << 4) | (n3Minus1 << 2) | (n4Minus1);
scratch[writePos++] = (byte) flag;
BitUtil.VH_LE_INT.set(scratch, writePos, values[readPos++]);
BitUtil.VH_LE_INT.set(scratch, writePos, (int) (values[readPos++]));
writePos += n1Minus1 + 1;
BitUtil.VH_LE_INT.set(scratch, writePos, values[readPos++]);
BitUtil.VH_LE_INT.set(scratch, writePos, (int) (values[readPos++]));
writePos += n2Minus1 + 1;
BitUtil.VH_LE_INT.set(scratch, writePos, values[readPos++]);
BitUtil.VH_LE_INT.set(scratch, writePos, (int) (values[readPos++]));
writePos += n3Minus1 + 1;
BitUtil.VH_LE_INT.set(scratch, writePos, values[readPos++]);
BitUtil.VH_LE_INT.set(scratch, writePos, (int) (values[readPos++]));
writePos += n4Minus1 + 1;

out.writeBytes(scratch, writePos);
}

// tail vints
for (; readPos < limit; readPos++) {
out.writeVInt(values[readPos]);
out.writeVInt(toInt(values[readPos]));
}
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -61,7 +61,7 @@ public void testGroupVIntMultiBlocks() throws IOException {
public void testCloneClose() throws Exception {
Directory dir = getDirectory(createTempDir("testCloneClose"));
IndexOutput io = dir.createOutput("bytes", newIOContext(random()));
final long[] values = new long[] {0, 7, 11, 9};
final int[] values = new int[] {0, 7, 11, 9};
io.writeVInt(5);
io.writeGroupVInts(values, values.length);
io.close();
Expand Down Expand Up @@ -89,7 +89,7 @@ public void testCloneClose() throws Exception {
public void testCloneSliceClose() throws Exception {
Directory dir = getDirectory(createTempDir("testCloneSliceClose"));
IndexOutput io = dir.createOutput("bytes", newIOContext(random()));
final long[] values = new long[] {0, 7, 11, 9};
final int[] values = new int[] {0, 7, 11, 9};
io.writeInt(1);
io.writeInt(2);
io.writeGroupVInts(values, values.length); // will write 5 bytes
Expand Down
Loading
Loading