Skip to content
Open
Show file tree
Hide file tree
Changes from 2 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions lucene/CHANGES.txt
Original file line number Diff line number Diff line change
Expand Up @@ -111,6 +111,8 @@ Improvements

* GITHUB#15225: Improve package documentation for org.apache.lucene.util. (Syed Mohammad Saad)

* GITHUB#15795: Introduce MemoryAccountingBitsetCollectorManager to parallelize search when using MemoryAccountingBitsetCollector. (Binlong Gao)

Optimizations
---------------------
* GITHUB#15681: Replace pre-sized array or empty array with lambda expression to call Collection#toArray. (Zhou Hui)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -51,4 +51,12 @@ public void updateBytes(long bytes) {
public long getBytes() {
return memoryUsage.get();
}

public String getName() {
return name;
}

public long getMemoryLimit() {
return memoryLimit;
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -40,7 +40,7 @@ public MemoryAccountingBitsetCollector(CollectorMemoryTracker tracker) {
@Override
protected void doSetNextReader(LeafReaderContext context) throws IOException {
docBase = context.docBase;
length += context.reader().maxDoc();
length = docBase + context.reader().maxDoc();
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This change works for both single-thread case and concurrency search case, the original code doesn't work for concurrency search case since length always starts from 0 for each collector.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Does this mean we're creating a maxDoc-length FixedBitSet for every thread? I wonder if there's a way of being more memory efficient here?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I've refactored the code to make it memory efficient for concurrent search case, please take a look, thanks!

FixedBitSet newBitSet = FixedBitSet.ensureCapacity(bitSet, length);
if (newBitSet != bitSet) {
tracker.updateBytes(newBitSet.ramBytesUsed() - bitSet.ramBytesUsed());
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,82 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.lucene.misc.search;

import java.util.Collection;
import org.apache.lucene.misc.CollectorMemoryTracker;
import org.apache.lucene.search.CollectorManager;
import org.apache.lucene.util.FixedBitSet;

/**
* CollectorManager for MemoryAccountingBitsetCollector that supports concurrent search.
*
* <p>Creates multiple collectors for concurrent execution, merges their bitsets and accumulate
* their memory bytes used
*/
public class MemoryAccountingBitsetCollectorManager
implements CollectorManager<MemoryAccountingBitsetCollector, FixedBitSet> {

private final CollectorMemoryTracker tracker;
private long totalBytesUsed;

public MemoryAccountingBitsetCollectorManager(CollectorMemoryTracker tracker) {
this.tracker = tracker;
}

@Override
public MemoryAccountingBitsetCollector newCollector() {
return new MemoryAccountingBitsetCollector(
new CollectorMemoryTracker(tracker.getName() + "-collector", tracker.getMemoryLimit()));
}

@Override
public FixedBitSet reduce(Collection<MemoryAccountingBitsetCollector> collectors) {
if (collectors.isEmpty()) {
return new FixedBitSet(0);
}

if (collectors.size() == 1) {
MemoryAccountingBitsetCollector collector = collectors.stream().findFirst().get();
this.totalBytesUsed = collector.tracker.getBytes();
return collector.bitSet;
}

int maxLength = 0;
MemoryAccountingBitsetCollector largest = null;
for (MemoryAccountingBitsetCollector collector : collectors) {
int bitSetLength = collector.bitSet.length();
if (largest == null || bitSetLength > maxLength) {
maxLength = bitSetLength;
largest = collector;
}
}

FixedBitSet result = largest.bitSet;
for (MemoryAccountingBitsetCollector collector : collectors) {
if (collector != largest) {
result.or(collector.bitSet);
}
this.totalBytesUsed += collector.tracker.getBytes();
}

return result;
}

public long getBytes() {
return this.totalBytesUsed;
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,7 @@
import org.apache.lucene.tests.index.RandomIndexWriter;
import org.apache.lucene.tests.util.LuceneTestCase;
import org.apache.lucene.util.BytesRef;
import org.apache.lucene.util.FixedBitSet;

public class TestMemoryAccountingBitsetCollector extends LuceneTestCase {

Expand Down Expand Up @@ -61,27 +62,26 @@ public void testMemoryAccountingBitsetCollectorMemoryLimit() {
long perCollectorMemoryLimit = 150;
CollectorMemoryTracker tracker =
new CollectorMemoryTracker("testMemoryTracker", perCollectorMemoryLimit);
MemoryAccountingBitsetCollector bitSetCollector = new MemoryAccountingBitsetCollector(tracker);

IndexSearcher searcher = new IndexSearcher(reader);
MemoryAccountingBitsetCollectorManager bitsetCollectorManager =
new MemoryAccountingBitsetCollectorManager(tracker);
IndexSearcher searcher = newSearcher(reader);
expectThrows(
IllegalStateException.class,
() -> {
searcher.search(MatchAllDocsQuery.INSTANCE, bitSetCollector);
});
() -> searcher.search(MatchAllDocsQuery.INSTANCE, bitsetCollectorManager));
}

public void testCollectedResult() throws Exception {
CollectorMemoryTracker tracker =
new CollectorMemoryTracker("testMemoryTracker", Long.MAX_VALUE);
MemoryAccountingBitsetCollector collector = new MemoryAccountingBitsetCollector(tracker);
MemoryAccountingBitsetCollectorManager bitsetCollectorManager =
new MemoryAccountingBitsetCollectorManager(tracker);

IndexSearcher searcher = new IndexSearcher(reader);
searcher.search(MatchAllDocsQuery.INSTANCE, collector);
IndexSearcher searcher = newSearcher(reader);
FixedBitSet result = searcher.search(MatchAllDocsQuery.INSTANCE, bitsetCollectorManager);

assertEquals(1000, collector.bitSet.cardinality());
assertEquals(1000, result.cardinality());
for (int i = 0; i < 1000; i++) {
assertTrue(collector.bitSet.get(i));
assertTrue(result.get(i));
}
}
}
Loading