diff --git a/lucene/CHANGES.txt b/lucene/CHANGES.txt index f061ce8ddd7f..9d739e1366b5 100644 --- a/lucene/CHANGES.txt +++ b/lucene/CHANGES.txt @@ -235,6 +235,8 @@ Optimizations * GITHUB#15729: Lucene90DocValuesProducer is not prefetching any data for DocValueSkippers anymore (Alexander Reelsen) +* GITHUB#15772: improve BytesRefHash.sort performance by rearranging ids#15772 (tyronecai) + * GITHUB#15742: Optimize int4 dotProduct and squareDistance computations by replacing vector conversions with reinterpret casting + bit manipulation. (Trevor McCulloch, Kaival Parikh) Bug Fixes diff --git a/lucene/core/src/java/org/apache/lucene/util/BytesRefHash.java b/lucene/core/src/java/org/apache/lucene/util/BytesRefHash.java index 6c13273263cf..2e1850344dc9 100644 --- a/lucene/core/src/java/org/apache/lucene/util/BytesRefHash.java +++ b/lucene/core/src/java/org/apache/lucene/util/BytesRefHash.java @@ -178,18 +178,13 @@ public BytesRef get(int bytesID, BytesRef ref) { */ public int[] compact() { assert bytesStart != null : "bytesStart is null - not initialized"; - int upto = 0; - for (int i = 0; i < hashSize; i++) { - if (ids[i] != -1) { - ids[upto] = ids[i] & hashMask; - if (upto < i) { - ids[i] = -1; - } - upto++; - } + + // id is the sequence number when bytes added to the pool + for (int i = 0; i < count; i++) { + ids[i] = i; } + Arrays.fill(ids, count, hashSize, -1); - assert upto == count; lastCount = count; return ids; }