From 4c98a40e7eab4831a1c05796b19aa7607fbea165 Mon Sep 17 00:00:00 2001 From: Binlong Gao Date: Thu, 8 Jan 2026 18:07:50 +0800 Subject: [PATCH 1/2] Introduce AllGroupsCollectorManager Signed-off-by: Binlong Gao --- lucene/CHANGES.txt | 2 + .../grouping/AllGroupsCollectorManager.java | 87 +++++++++++++++++++ .../grouping/TestAllGroupsCollector.java | 29 ++++--- 3 files changed, 106 insertions(+), 12 deletions(-) create mode 100644 lucene/grouping/src/java/org/apache/lucene/search/grouping/AllGroupsCollectorManager.java diff --git a/lucene/CHANGES.txt b/lucene/CHANGES.txt index 1f11d0a4a48a..e89097c006f7 100644 --- a/lucene/CHANGES.txt +++ b/lucene/CHANGES.txt @@ -83,6 +83,8 @@ Improvements * GITHUB#15453: Avoid unnecessary sorting and instantiations in readMapOfStrings. (Benjamin Lerer) +* GITHUB: Introduce AllGroupsCollectorManager to parallelize search when using AllGroupsCollector. (Binlong Gao) + Optimizations --------------------- * GITHUB#14011: Reduce allocation rate in HNSW concurrent merge. (Viliam Durina) diff --git a/lucene/grouping/src/java/org/apache/lucene/search/grouping/AllGroupsCollectorManager.java b/lucene/grouping/src/java/org/apache/lucene/search/grouping/AllGroupsCollectorManager.java new file mode 100644 index 000000000000..5c46d8d3eb18 --- /dev/null +++ b/lucene/grouping/src/java/org/apache/lucene/search/grouping/AllGroupsCollectorManager.java @@ -0,0 +1,87 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.lucene.search.grouping; + +import java.util.Collection; +import java.util.Collections; +import java.util.HashSet; +import java.util.Map; +import java.util.Set; +import org.apache.lucene.queries.function.ValueSource; +import org.apache.lucene.search.CollectorManager; + +/** + * A CollectorManager implementation for AllGroupsCollector. + * + * @lucene.experimental + */ +public class AllGroupsCollectorManager + implements CollectorManager, Collection> { + + private final String groupField; + private final ValueSource valueSource; + private final Map valueSourceContext; + + /** Creates a new AllGroupsCollectorManager for TermGroupSelector. */ + public AllGroupsCollectorManager(String groupField) { + this.groupField = groupField; + this.valueSource = null; + this.valueSourceContext = null; + } + + /** Creates a new AllGroupsCollectorManager for ValueSourceGroupSelector. */ + public AllGroupsCollectorManager( + ValueSource valueSource, Map valueSourceContext) { + this.groupField = null; + this.valueSource = valueSource; + this.valueSourceContext = valueSourceContext; + } + + @Override + public AllGroupsCollector newCollector() { + GroupSelector newGroupSelector; + if (groupField != null) { + newGroupSelector = new TermGroupSelector(groupField); + } else { + newGroupSelector = new ValueSourceGroupSelector(valueSource, valueSourceContext); + } + + return new AllGroupsCollector<>(newGroupSelector); + } + + @Override + public Collection reduce(Collection> collectors) { + if (collectors.isEmpty()) { + return Collections.emptyList(); + } + + if (collectors.size() == 1) { + return collectors.iterator().next().getGroups(); + } + + // Merge groups from all collectors + Set allGroups = new HashSet<>(); + for (AllGroupsCollector collector : collectors) { + Collection groups = collector.getGroups(); + if (groups != null) { + allGroups.addAll(groups); + } + } + + return allGroups; + } +} diff --git a/lucene/grouping/src/test/org/apache/lucene/search/grouping/TestAllGroupsCollector.java b/lucene/grouping/src/test/org/apache/lucene/search/grouping/TestAllGroupsCollector.java index 220c4ed761ab..f3f3ebee9dc5 100644 --- a/lucene/grouping/src/test/org/apache/lucene/search/grouping/TestAllGroupsCollector.java +++ b/lucene/grouping/src/test/org/apache/lucene/search/grouping/TestAllGroupsCollector.java @@ -16,6 +16,7 @@ */ package org.apache.lucene.search.grouping; +import java.util.Collection; import java.util.HashMap; import org.apache.lucene.document.Document; import org.apache.lucene.document.Field; @@ -100,17 +101,21 @@ public void testTotalGroupCount() throws Exception { IndexSearcher indexSearcher = newSearcher(w.getReader()); w.close(); - AllGroupsCollector allGroupsCollector = createRandomCollector(groupField); - indexSearcher.search(new TermQuery(new Term("content", "random")), allGroupsCollector); - assertEquals(4, allGroupsCollector.getGroupCount()); + AllGroupsCollectorManager allGroupsCollectorManager = createRandomCollectorManager(groupField); + Collection groups = + indexSearcher.search( + new TermQuery(new Term("content", "random")), allGroupsCollectorManager); + assertEquals(4, groups.size()); - allGroupsCollector = createRandomCollector(groupField); - indexSearcher.search(new TermQuery(new Term("content", "some")), allGroupsCollector); - assertEquals(3, allGroupsCollector.getGroupCount()); + allGroupsCollectorManager = createRandomCollectorManager(groupField); + groups = + indexSearcher.search(new TermQuery(new Term("content", "some")), allGroupsCollectorManager); + assertEquals(3, groups.size()); - allGroupsCollector = createRandomCollector(groupField); - indexSearcher.search(new TermQuery(new Term("content", "blob")), allGroupsCollector); - assertEquals(2, allGroupsCollector.getGroupCount()); + allGroupsCollectorManager = createRandomCollectorManager(groupField); + groups = + indexSearcher.search(new TermQuery(new Term("content", "blob")), allGroupsCollectorManager); + assertEquals(2, groups.size()); indexSearcher.getIndexReader().close(); dir.close(); @@ -121,12 +126,12 @@ private void addGroupField(Document doc, String groupField, String value) { doc.add(new SortedDocValuesField(groupField, new BytesRef(value))); } - private AllGroupsCollector createRandomCollector(String groupField) { + private AllGroupsCollectorManager createRandomCollectorManager(String groupField) { if (random().nextBoolean()) { - return new AllGroupsCollector<>(new TermGroupSelector(groupField)); + return new AllGroupsCollectorManager(groupField); } else { ValueSource vs = new BytesRefFieldSource(groupField); - return new AllGroupsCollector<>(new ValueSourceGroupSelector(vs, new HashMap<>())); + return new AllGroupsCollectorManager(vs, new HashMap<>()); } } } From 78518b2372ce7afab004d49a68e70233f93ef694 Mon Sep 17 00:00:00 2001 From: Binlong Gao Date: Fri, 9 Jan 2026 10:40:55 +0800 Subject: [PATCH 2/2] Modify change log Signed-off-by: Binlong Gao --- lucene/CHANGES.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/lucene/CHANGES.txt b/lucene/CHANGES.txt index e89097c006f7..46fec271e749 100644 --- a/lucene/CHANGES.txt +++ b/lucene/CHANGES.txt @@ -83,7 +83,7 @@ Improvements * GITHUB#15453: Avoid unnecessary sorting and instantiations in readMapOfStrings. (Benjamin Lerer) -* GITHUB: Introduce AllGroupsCollectorManager to parallelize search when using AllGroupsCollector. (Binlong Gao) +* GITHUB#15557: Introduce AllGroupsCollectorManager to parallelize search when using AllGroupsCollector. (Binlong Gao) Optimizations ---------------------