Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion lucene/CHANGES.txt
Original file line number Diff line number Diff line change
Expand Up @@ -120,7 +120,7 @@ Optimizations

* GITHUB#14447: Compute the doc range more efficiently when flushing doc block. (Pan Guixin)

* GITHUB#14527: Reduce NeighborArray heap memory. (weizijun)
* GITHUB#14527, GITHUB#14765: Reduce NeighborArray heap memory. (weizijun, Ben Trent)

* GITHUB#14529, GITHUB#14555, GITHUB#14618: Impl intoBitset for IndexedDISI and Docvalues. (Guo Feng)

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -444,7 +444,6 @@ void finish() throws IOException {
// see: https://github.com/apache/lucene/issues/14214
// connectComponents();
frozen = true;
hnsw.finishBuild();
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Hmm, why remove this?

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@mikemccand its not used. It was only added for the RAM estimate added in the previous PR. Since we are doing incremental updates, I just removed it.

}

@SuppressWarnings("unused")
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -19,9 +19,9 @@

import java.io.IOException;
import java.util.Arrays;
import java.util.function.LongConsumer;
import org.apache.lucene.internal.hppc.MaxSizedFloatArrayList;
import org.apache.lucene.internal.hppc.MaxSizedIntArrayList;
import org.apache.lucene.util.Accountable;
import org.apache.lucene.util.RamUsageEstimator;

/**
Expand All @@ -32,7 +32,7 @@
*
* @lucene.internal
*/
public class NeighborArray implements Accountable {
public class NeighborArray {
private static final long BASE_RAM_BYTES_USED =
RamUsageEstimator.shallowSizeOfInstance(NeighborArray.class);

Expand All @@ -42,12 +42,23 @@ public class NeighborArray implements Accountable {
private final MaxSizedFloatArrayList scores;
private final MaxSizedIntArrayList nodes;
private int sortedNodeSize;
private long ramBytesUsed = BASE_RAM_BYTES_USED;
private final LongConsumer onHeapMemoryUsageListener;

public NeighborArray(int maxSize, boolean descOrder) {
this(maxSize, descOrder, null);
}

public NeighborArray(int maxSize, boolean descOrder, LongConsumer onHeapMemoryUsageListener) {
this.maxSize = maxSize;
nodes = new MaxSizedIntArrayList(maxSize, maxSize / 8);
scores = new MaxSizedFloatArrayList(maxSize, maxSize / 8);
this.ramBytesUsed += nodes.ramBytesUsed() + scores.ramBytesUsed();
this.scoresDescOrder = descOrder;
this.onHeapMemoryUsageListener = onHeapMemoryUsageListener;
if (onHeapMemoryUsageListener != null) {
onHeapMemoryUsageListener.accept(ramBytesUsed);
}
}

/**
Expand All @@ -68,8 +79,10 @@ public void addInOrder(int newNode, float newScore) {
+ " to "
+ Arrays.toString(scores.toArray());
}
int previousLength = nodes.buffer.length;
nodes.add(newNode);
scores.add(newScore);
alertOnHeapMemoryUsageChange(nodes.buffer.length, previousLength);
++size;
++sortedNodeSize;
}
Expand All @@ -79,12 +92,21 @@ public void addOutOfOrder(int newNode, float newScore) {
if (size == maxSize) {
throw new IllegalStateException("No growth is allowed");
}

int previousLength = nodes.buffer.length;
nodes.add(newNode);
scores.add(newScore);
alertOnHeapMemoryUsageChange(nodes.buffer.length, previousLength);
size++;
}

private void alertOnHeapMemoryUsageChange(int newLength, int previousLength) {
if (newLength > previousLength && onHeapMemoryUsageListener != null) {
int lengthDelta = newLength - previousLength;
onHeapMemoryUsageListener.accept(
(long) (lengthDelta) * Integer.BYTES + (long) (lengthDelta) * Float.BYTES);
}
}

/**
* In addition to {@link #addOutOfOrder(int, float)}, this function will also remove the
* least-diverse node if the node array is full after insertion
Expand Down Expand Up @@ -313,9 +335,4 @@ private boolean isWorstNonDiverse(
public int maxSize() {
return maxSize;
}

@Override
public long ramBytesUsed() {
return BASE_RAM_BYTES_USED + nodes.ramBytesUsed() + scores.ramBytesUsed();
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -32,6 +32,10 @@
*/
public final class OnHeapHnswGraph extends HnswGraph implements Accountable {

// shallow estimate of the statically used on-heap memory.
private static final long RAM_BYTES_USED =
RamUsageEstimator.shallowSizeOfInstance(OnHeapHnswGraph.class);

private static final int INIT_SIZE = 128;

private final AtomicReference<EntryNode> entryNode;
Expand Down Expand Up @@ -83,6 +87,7 @@ public final class OnHeapHnswGraph extends HnswGraph implements Accountable {
numNodes = INIT_SIZE;
}
this.graph = new NeighborArray[numNodes][];
this.graphRamBytesUsed = RAM_BYTES_USED + RamUsageEstimator.shallowSizeOf(graph);
}

/**
Expand Down Expand Up @@ -158,21 +163,28 @@ public void addNode(int level, int node) {
size.incrementAndGet();
}
if (level == 0) {
graph[node][level] = new NeighborArray(nsize0, true);
graph[node][level] =
new NeighborArray(
nsize0,
true,
l -> {
assert l > 0;
long bytesUsed = graphRamBytesUsed;
graphRamBytesUsed = bytesUsed + l;
});
} else {
graph[node][level] = new NeighborArray(nsize, true);
graph[node][level] =
new NeighborArray(
nsize,
true,
l -> {
assert l > 0;
long bytesUsed = graphRamBytesUsed;
graphRamBytesUsed = bytesUsed + l;
});
nonZeroLevelSize.incrementAndGet();
}
maxNodeId.accumulateAndGet(node, Math::max);
// update graphRamBytesUsed every 1000 nodes
if (level == 0 && node % 1000 == 0) {
updateGraphRamBytesUsed();
}
}

/** Finish building the graph. */
public void finishBuild() {
updateGraphRamBytesUsed();
}

@Override
Expand Down Expand Up @@ -303,48 +315,14 @@ private void generateLevelToNodes() {
lastFreezeSize = size();
}

/** Update the estimated ram bytes used for the neighbor array. */
public void updateGraphRamBytesUsed() {
long currentRamBytesUsedEstimate = RamUsageEstimator.NUM_BYTES_ARRAY_HEADER;
for (int node = 0; node < graph.length; node++) {
if (graph[node] == null) {
continue;
}

for (int i = 0; i < graph[node].length; i++) {
if (graph[node][i] == null) {
continue;
}
currentRamBytesUsedEstimate += graph[node][i].ramBytesUsed();
}

currentRamBytesUsedEstimate += RamUsageEstimator.NUM_BYTES_OBJECT_HEADER;
}
graphRamBytesUsed = currentRamBytesUsedEstimate;
}

/**
* Provides an estimate of the current on-heap memory usage of the graph. This is not threadsafe,
* meaning the heap utilization if building the graph concurrently may be inaccurate. The main
* purpose of this method is during initial document indexing and flush.
*/
@Override
public long ramBytesUsed() {
long total = graphRamBytesUsed; // all NeighborArray
total += 4 * Integer.BYTES; // all int fields
total += 1; // field: noGrowth
total +=
RamUsageEstimator.NUM_BYTES_OBJECT_REF
+ RamUsageEstimator.NUM_BYTES_OBJECT_HEADER
+ 2 * Integer.BYTES; // field: entryNode
total += 3L * (Integer.BYTES + RamUsageEstimator.NUM_BYTES_OBJECT_HEADER); // 3 AtomicInteger
total += RamUsageEstimator.NUM_BYTES_OBJECT_REF; // field: cur
total += RamUsageEstimator.NUM_BYTES_ARRAY_HEADER; // field: levelToNodes
if (levelToNodes != null) {
total +=
(long) (numLevels() - 1) * RamUsageEstimator.NUM_BYTES_OBJECT_REF; // no cost for level 0
total +=
(long) nonZeroLevelSize.get()
* (RamUsageEstimator.NUM_BYTES_OBJECT_HEADER
+ RamUsageEstimator.NUM_BYTES_OBJECT_HEADER
+ Integer.BYTES);
}
return total;
return graphRamBytesUsed;
}

@Override
Expand Down