Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion lucene/CHANGES.txt
Original file line number Diff line number Diff line change
Expand Up @@ -94,7 +94,7 @@ Optimizations
---------------------
* GITHUB#14418: Quick exit on filter query matching no docs when rewriting knn query. (Pan Guixin)

* GITHUB#14439, GITHUB#14560: Efficient Histogram Collection using multi range traversal over PointTrees (Ankit Jain)
* GITHUB#14439, GITHUB#14560, GITHUB#14622: Efficient Histogram Collection using multi range traversal over PointTrees (Ankit Jain)

* GITHUB#14268: PointInSetQuery early exit on non-matching segments. (hanbj)

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -33,8 +33,10 @@
import org.apache.lucene.sandbox.facet.plain.histograms.HistogramCollectorManager;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.MatchAllDocsQuery;
import org.apache.lucene.search.PointRangeQuery;
import org.apache.lucene.store.Directory;
import org.apache.lucene.store.MMapDirectory;
import org.apache.lucene.util.NumericUtils;
import org.openjdk.jmh.annotations.Benchmark;
import org.openjdk.jmh.annotations.BenchmarkMode;
import org.openjdk.jmh.annotations.Fork;
Expand Down Expand Up @@ -122,9 +124,40 @@ public static class BenchmarkParams {
}

@Benchmark
public void collectHistogram(BenchmarkParams params) throws IOException {
public void matchAllQueryHistogram(BenchmarkParams params) throws IOException {
IndexSearcher searcher = new IndexSearcher(reader);
searcher.search(
new MatchAllDocsQuery(), new HistogramCollectorManager("f", params.bucketWidth, 10000));
}

@Benchmark
public void pointRangeQueryHistogram(BenchmarkParams params) throws IOException {
IndexSearcher searcher = new IndexSearcher(reader);

Random r = new Random(0);
int lowerBound = r.nextInt(params.docCount / 4, 3 * params.docCount / 4);
// Filter for about 1/10 of the available documents
int upperBound = lowerBound + params.docCount / 10;

if (params.pointEnabled) {
byte[] lowerPoint = new byte[Long.BYTES];
byte[] upperPoint = new byte[Long.BYTES];
NumericUtils.longToSortableBytes(lowerBound, lowerPoint, 0);
NumericUtils.longToSortableBytes(upperBound, upperPoint, 0);
final PointRangeQuery prq =
new PointRangeQuery("f", lowerPoint, upperPoint, 1) {
@Override
protected String toString(int dimension, byte[] value) {
return Long.toString(NumericUtils.sortableBytesToLong(value, 0));
}
};

// Don't need to increase the default bucket count
searcher.search(prq, new HistogramCollectorManager("f", params.bucketWidth));
} else {
searcher.search(
NumericDocValuesField.newSlowRangeQuery("f", lowerBound, upperBound),
new HistogramCollectorManager("f", params.bucketWidth));
}
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -187,8 +187,11 @@ public PointValues.Relation compare(byte[] minPackedValue, byte[] maxPackedValue
}
}

// Not possible to have the CELL_OUTSIDE_QUERY, as bucket lower bound is updated
// while finalizing the previous bucket
// CELL_OUTSIDE_QUERY is possible for the first bucket
// in case of PointRangeQuery [min, max] lower
if (!collector.withinLowerBound(maxPackedValue)) {
return PointValues.Relation.CELL_OUTSIDE_QUERY;
}
if (collector.withinRange(minPackedValue) && collector.withinRange(maxPackedValue)) {
return PointValues.Relation.CELL_INSIDE_QUERY;
}
Expand Down