Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions lucene/CHANGES.txt
Original file line number Diff line number Diff line change
Expand Up @@ -37,6 +37,8 @@ Optimizations
---------------------
* GITHUB#14011: Reduce allocation rate in HNSW concurrent merge. (Viliam Durina)
* GITHUB#14022: Optimize DFS marking of connected components in HNSW by reducing stack depth, improving performance and reducing allocations. (Viswanath Kuchibhotla)
* GITHUB#14609: Optimizes PointRangeQuery by rewriting to MatchAllDocsQuery/FieldExistsQuery/MatchNoDocsQuery if all
docs in index are contained or excluded (Elliott Bradshaw)

Bug Fixes
---------------------
Expand Down
172 changes: 118 additions & 54 deletions lucene/core/src/java/org/apache/lucene/search/PointRangeQuery.java
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,9 @@
import java.util.function.BiFunction;
import java.util.function.Predicate;
import org.apache.lucene.document.IntPoint;
import org.apache.lucene.index.DocValuesType;
import org.apache.lucene.index.FieldInfo;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.LeafReader;
import org.apache.lucene.index.LeafReaderContext;
import org.apache.lucene.index.PointValues;
Expand Down Expand Up @@ -54,6 +57,7 @@ public abstract class PointRangeQuery extends Query {
final int bytesPerDim;
final byte[] lowerPoint;
final byte[] upperPoint;
final ByteArrayComparator comparator;

/**
* Expert: create a multidimensional range query for point values.
Expand Down Expand Up @@ -89,6 +93,8 @@ protected PointRangeQuery(String field, byte[] lowerPoint, byte[] upperPoint, in

this.lowerPoint = lowerPoint;
this.upperPoint = upperPoint;

this.comparator = ArrayUtil.getUnsignedComparator(bytesPerDim);
}

/**
Expand Down Expand Up @@ -125,8 +131,6 @@ public final Weight createWeight(IndexSearcher searcher, ScoreMode scoreMode, fl

return new ConstantScoreWeight(this, boost) {

private final ByteArrayComparator comparator = ArrayUtil.getUnsignedComparator(bytesPerDim);

private boolean matches(byte[] packedValue) {
int offset = 0;
for (int dim = 0; dim < numDims; dim++, offset += bytesPerDim) {
Expand All @@ -142,30 +146,6 @@ private boolean matches(byte[] packedValue) {
return true;
}

private Relation relate(byte[] minPackedValue, byte[] maxPackedValue) {

boolean crosses = false;
int offset = 0;

for (int dim = 0; dim < numDims; dim++, offset += bytesPerDim) {

if (comparator.compare(minPackedValue, offset, upperPoint, offset) > 0
|| comparator.compare(maxPackedValue, offset, lowerPoint, offset) < 0) {
return Relation.CELL_OUTSIDE_QUERY;
}

crosses |=
comparator.compare(minPackedValue, offset, lowerPoint, offset) < 0
|| comparator.compare(maxPackedValue, offset, upperPoint, offset) > 0;
}

if (crosses) {
return Relation.CELL_CROSSES_QUERY;
} else {
return Relation.CELL_INSIDE_QUERY;
}
}

private IntersectVisitor getIntersectVisitor(DocIdSetBuilder result) {
return new IntersectVisitor() {

Expand Down Expand Up @@ -268,33 +248,6 @@ public Relation compare(byte[] minPackedValue, byte[] maxPackedValue) {
};
}

private boolean checkValidPointValues(PointValues values) throws IOException {
if (values == null) {
// No docs in this segment/field indexed any points
return false;
}

if (values.getNumIndexDimensions() != numDims) {
throw new IllegalArgumentException(
"field=\""
+ field
+ "\" was indexed with numIndexDimensions="
+ values.getNumIndexDimensions()
+ " but this query has numDims="
+ numDims);
}
if (bytesPerDim != values.getBytesPerDimension()) {
throw new IllegalArgumentException(
"field=\""
+ field
+ "\" was indexed with bytesPerDim="
+ values.getBytesPerDimension()
+ " but this query has bytesPerDim="
+ bytesPerDim);
}
return true;
}

@Override
public ScorerSupplier scorerSupplier(LeafReaderContext context) throws IOException {
LeafReader reader = context.reader();
Expand Down Expand Up @@ -402,7 +355,8 @@ public int count(LeafReaderContext context) throws IOException {
// docCount == size : counting according number of points in leaf node, so must be
// single-valued.
if (numDims == 1 && values.getDocCount() == values.size()) {
return (int) pointCount(values.getPointTree(), this::relate, this::matches);
return (int)
pointCount(values.getPointTree(), PointRangeQuery.this::relate, this::matches);
}
}
return super.count(context);
Expand Down Expand Up @@ -580,4 +534,114 @@ public final String toString(String field) {
* @return human readable value for debugging
*/
protected abstract String toString(int dimension, byte[] value);

@Override
public Query rewrite(IndexSearcher searcher) throws IOException {
IndexReader reader = searcher.getIndexReader();

for (LeafReaderContext leaf : reader.leaves()) {
checkValidPointValues(leaf.reader().getPointValues(field));
}

// fetch the global min/max packed values across all segments
byte[] globalMinPacked = PointValues.getMinPackedValue(reader, getField());
byte[] globalMaxPacked = PointValues.getMaxPackedValue(reader, getField());

if (globalMinPacked == null || globalMaxPacked == null) {
return new MatchNoDocsQuery();
}
Comment thread
jainankitk marked this conversation as resolved.

return switch (relate(globalMinPacked, globalMaxPacked)) {
case CELL_INSIDE_QUERY -> {
if (canRewriteToMatchAllQuery(reader)) {
yield new MatchAllDocsQuery();
} else if (canRewriteToFieldExistsQuery(reader)) {
yield new FieldExistsQuery(field);
} else {
yield super.rewrite(searcher);
}
}
case CELL_OUTSIDE_QUERY -> new MatchNoDocsQuery();
case CELL_CROSSES_QUERY -> super.rewrite(searcher);
};
}

private boolean canRewriteToMatchAllQuery(IndexReader reader) throws IOException {
for (LeafReaderContext context : reader.leaves()) {
LeafReader leaf = context.reader();
PointValues values = leaf.getPointValues(field);

if (values == null || values.getDocCount() != leaf.maxDoc()) {
return false;
}
}

return true;
}

private boolean canRewriteToFieldExistsQuery(IndexReader reader) {
for (LeafReaderContext leaf : reader.leaves()) {
FieldInfo info = leaf.reader().getFieldInfos().fieldInfo(field);

if (info != null
&& info.getDocValuesType() == DocValuesType.NONE
&& !info.hasNorms()
&& info.getVectorDimension() == 0) {
// Can't use a FieldExistsQuery on this segment, so return false
return false;
}
}

return true;
}

private Relation relate(byte[] minPackedValue, byte[] maxPackedValue) {
boolean crosses = false;
int offset = 0;

for (int dim = 0; dim < numDims; dim++, offset += bytesPerDim) {

if (comparator.compare(minPackedValue, offset, upperPoint, offset) > 0
|| comparator.compare(maxPackedValue, offset, lowerPoint, offset) < 0) {
return Relation.CELL_OUTSIDE_QUERY;
}

crosses |=
comparator.compare(minPackedValue, offset, lowerPoint, offset) < 0
|| comparator.compare(maxPackedValue, offset, upperPoint, offset) > 0;
}

if (crosses) {
return Relation.CELL_CROSSES_QUERY;
} else {
return Relation.CELL_INSIDE_QUERY;
}
}

private boolean checkValidPointValues(PointValues values) throws IOException {
if (values == null) {
// No docs in this segment/field indexed any points
return false;
}

if (values.getNumIndexDimensions() != numDims) {
throw new IllegalArgumentException(
"field=\""
+ field
+ "\" was indexed with numIndexDimensions="
+ values.getNumIndexDimensions()
+ " but this query has numDims="
+ numDims);
}
if (bytesPerDim != values.getBytesPerDimension()) {
throw new IllegalArgumentException(
"field=\""
+ field
+ "\" was indexed with bytesPerDim="
+ values.getBytesPerDimension()
+ " but this query has bytesPerDim="
+ bytesPerDim);
}
return true;
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -1953,7 +1953,11 @@ public void testSkipCachingForRangeQuery() throws IOException {
doc2.add(new StringField("name", "alice", Store.YES));
doc2.add(new LongPoint("age", 20));
doc2.add(new SortedNumericDocValuesField("age", 20));
w.addDocuments(Arrays.asList(doc1, doc2));
Document doc3 = new Document();
doc3.add(new StringField("name", "steve", Store.YES));
doc3.add(new LongPoint("age", 25));
doc3.add(new SortedNumericDocValuesField("age", 25));
w.addDocuments(Arrays.asList(doc1, doc2, doc3));
final IndexReader reader = w.getReader();
final IndexSearcher searcher = newSearcher(reader);
searcher.setQueryCachingPolicy(ALWAYS_CACHE);
Expand All @@ -1964,8 +1968,8 @@ public void testSkipCachingForRangeQuery() throws IOException {
TermQuery subQuery1 = new TermQuery(new Term("name", "tom"));
IndexOrDocValuesQuery subQuery2 =
new IndexOrDocValuesQuery(
LongPoint.newRangeQuery("age", 10, 30),
SortedNumericDocValuesField.newSlowRangeQuery("age", 10, 30));
LongPoint.newRangeQuery("age", 10, 20),
SortedNumericDocValuesField.newSlowRangeQuery("age", 10, 20));
BooleanQuery query = bq.add(subQuery1, Occur.FILTER).add(subQuery2, Occur.FILTER).build();
Set<Query> cacheSet = new HashSet<>();

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -772,6 +772,7 @@ private void verifyBinary(byte[][][] docValues, int[] ids, int numBytesPerDim) t

if (missing.get(id) == false) {
doc.add(new BinaryPoint("value", docValues[ord]));
doc.add(new SortedNumericDocValuesField("value", 1));
if (VERBOSE) {
System.out.println("id=" + id);
for (int dim = 0; dim < numDims; dim++) {
Expand Down Expand Up @@ -2465,6 +2466,66 @@ public void testInversePointRange() throws IOException {
dir.close();
}

public void testRangeQueryOptimizesRewrites() throws IOException {
Directory dir = newDirectory();
RandomIndexWriter w = new RandomIndexWriter(random(), dir, newIndexWriterConfig());

int numDims = randomIntValue(1, PointValues.MAX_INDEX_DIMENSIONS);
int[] point = new int[numDims];
int[] lower = new int[numDims];
int[] upper = new int[numDims];

for (int i = 0; i < numDims; i++) {
point[i] = randomIntValue(1, 10);
lower[i] = point[i] - randomIntValue(0, 5);
upper[i] = point[i] + randomIntValue(0, 5);
}

// Should rewrite to match all docs query if fully contained
w.addDocument(
List.of(new IntPoint("field", point), new SortedNumericDocValuesField("field", 1)));

IndexReader reader = w.getReader();
IndexSearcher searcher = new IndexSearcher(reader);

Query query = IntPoint.newRangeQuery("field", lower, upper);
Query rewritten = searcher.rewrite(query);
assertTrue(
"Expected MatchAllDocsQuery, but got [" + rewritten.getClass().getName() + "]",
rewritten instanceof MatchAllDocsQuery);
assertEquals(1, searcher.count(query));

// Should rewrite to FieldExistsQuery if fully contained but not all docs have values
w.addDocument(new Document());
w.forceMerge(1);

reader.close();
reader = w.getReader();
searcher = new IndexSearcher(reader);
rewritten = searcher.rewrite(query);
assertTrue(
"Expected FieldExistsQuery, but got [" + rewritten.getClass().getName() + "]",
rewritten instanceof FieldExistsQuery);
assertEquals(1, searcher.count(query));

// Should fallback to MatchNoDocsQuery if no docs have values
for (int i = 0; i < numDims; i++) {
lower[i] = point[i] - 3;
upper[i] = point[i] - 2;
}

query = IntPoint.newRangeQuery("field", lower, upper);
rewritten = searcher.rewrite(query);
assertTrue(
"Expected MatchNoDocsQuery, but got [" + rewritten.getClass().getName() + "]",
rewritten instanceof MatchNoDocsQuery);
assertEquals(0, searcher.count(query));

reader.close();
w.close();
dir.close();
}

public void testRangeQuerySkipsNonMatchingSegments() throws IOException {
Directory dir = newDirectory();
IndexWriter w = new IndexWriter(dir, newIndexWriterConfig());
Expand Down