Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions lucene/CHANGES.txt
Original file line number Diff line number Diff line change
Expand Up @@ -241,6 +241,8 @@ Optimizations

* GITHUB#15024: Improve prefix sum computation in Lucene99HnswVectorsReader for faster neighbor decoding. (Luis Negrin)

* GITHUB#15779: Improve BytesRefHash.add performance by optimize rehash operation (tyronecai)

Bug Fixes
---------------------
* GITHUB#15754: Fix HTMLStripCharFilter to prevent tags from incorrectly consuming subsequent
Expand Down
55 changes: 26 additions & 29 deletions lucene/core/src/java/org/apache/lucene/util/BytesRefHash.java
Original file line number Diff line number Diff line change
Expand Up @@ -433,40 +433,37 @@ public int addByPoolOffset(int offset) {
private void rehash(final int newSize, boolean hashOnData) {
final int newMask = newSize - 1;
final int newHighMask = ~newMask;
bytesUsed.addAndGet(Integer.BYTES * (long) newSize);
final int[] newHash = new int[newSize];
Arrays.fill(newHash, -1);
for (int i = 0; i < hashSize; i++) {
int e0 = ids[i];
if (e0 != -1) {
e0 &= hashMask;
final int hashcode;
int code;
if (hashOnData) {
hashcode = code = pool.hash(bytesStart[e0]);
} else {
code = bytesStart[e0];
hashcode = 0;
}

int hashPos = code & newMask;
assert hashPos >= 0;

// Conflict; use linear probe to find an open slot
// (see LUCENE-5604):
while (newHash[hashPos] != -1) {
code++;
hashPos = code & newMask;
}

newHash[hashPos] = e0 | (hashcode & newHighMask);
bytesUsed.addAndGet(Integer.BYTES * (long) (newSize - ids.length));

ids = new int[newSize];
Arrays.fill(ids, -1);

// rebuild ids from terms in pool pointed by bytesStart
for (int id = 0; id < count; id++) {
final int hashcode;
int code;
if (hashOnData) {
hashcode = code = pool.hash(bytesStart[id]);
} else {
code = bytesStart[id];
hashcode = 0;
}

int hashPos = code & newMask;
assert hashPos >= 0;

// Conflict; use linear probe to find an open slot
// (see LUCENE-5604):
while (ids[hashPos] != -1) {
code++;
hashPos = code & newMask;
}

ids[hashPos] = id | (hashcode & newHighMask);
}

hashMask = newMask;
highMask = newHighMask;
bytesUsed.addAndGet(Integer.BYTES * (long) -ids.length);
ids = newHash;
hashSize = newSize;
hashHalfSize = newSize / 2;
}
Expand Down
Loading