diff --git a/lucene/CHANGES.txt b/lucene/CHANGES.txt index 739943b899bd..069d8b4b0cea 100644 --- a/lucene/CHANGES.txt +++ b/lucene/CHANGES.txt @@ -58,6 +58,8 @@ API Changes * GITHUB#15295 : Switched to a fixed CFS threshold (Shubham Sharma) +* GITHUB#15627 : Deferred lambda in TermStates.java according to prefetch (Shubham Sharma) + New Features --------------------- diff --git a/lucene/backward-codecs/src/java/org/apache/lucene/backward_codecs/lucene90/blocktree/SegmentTermsEnumFrame.java b/lucene/backward-codecs/src/java/org/apache/lucene/backward_codecs/lucene90/blocktree/SegmentTermsEnumFrame.java index b7affa5c6f5e..126b510d2728 100644 --- a/lucene/backward-codecs/src/java/org/apache/lucene/backward_codecs/lucene90/blocktree/SegmentTermsEnumFrame.java +++ b/lucene/backward-codecs/src/java/org/apache/lucene/backward_codecs/lucene90/blocktree/SegmentTermsEnumFrame.java @@ -133,10 +133,10 @@ void loadNextFloorBlock() throws IOException { loadBlock(); } - void prefetchBlock() throws IOException { + boolean prefetchBlock() throws IOException { if (nextEnt != -1) { // Already loaded - return; + return false; } // Clone the IndexInput lazily, so that consumers @@ -145,7 +145,7 @@ void prefetchBlock() throws IOException { ste.initIndexInput(); // TODO: Could we know the number of bytes to prefetch? - ste.in.prefetch(fp, 1); + return ste.in.prefetch(fp, 1); } /* Does initial decode of next block of terms; this diff --git a/lucene/core/src/java/org/apache/lucene/codecs/lucene103/blocktree/SegmentTermsEnum.java b/lucene/core/src/java/org/apache/lucene/codecs/lucene103/blocktree/SegmentTermsEnum.java index c0c0991dfce7..7b99252b7eb8 100644 --- a/lucene/core/src/java/org/apache/lucene/codecs/lucene103/blocktree/SegmentTermsEnum.java +++ b/lucene/core/src/java/org/apache/lucene/codecs/lucene103/blocktree/SegmentTermsEnum.java @@ -476,26 +476,37 @@ private IOBooleanSupplier prepareSeekExact(BytesRef target, boolean prefetch) th private IOBooleanSupplier getIOBooleanSupplier(BytesRef target, boolean prefetch) throws IOException { + boolean doDefer; if (prefetch) { - currentFrame.prefetchBlock(); + doDefer = currentFrame.prefetchBlock(); + } else { + doDefer = false; } - return () -> { - currentFrame.loadBlock(); + return new IOBooleanSupplier() { + @Override + public boolean get() throws IOException { + currentFrame.loadBlock(); - final SeekStatus result = currentFrame.scanToTerm(target, true); - if (result == SeekStatus.FOUND) { - // if (DEBUG) { - // System.out.println(" return FOUND term=" + term.utf8ToString() + " " + term); - // } - return true; - } else { - // if (DEBUG) { - // System.out.println(" got result " + result + "; return NOT_FOUND term=" + - // term.utf8ToString()); - // } + final SeekStatus result = currentFrame.scanToTerm(target, true); + if (result == SeekStatus.FOUND) { + // if (DEBUG) { + // System.out.println(" return FOUND term=" + term.utf8ToString() + " " + term); + // } + return true; + } else { + // if (DEBUG) { + // System.out.println(" got result " + result + "; return NOT_FOUND term=" + + // term.utf8ToString()); + // } + + return false; + } + } - return false; + @Override + public boolean doDefer() { + return doDefer; } }; } diff --git a/lucene/core/src/java/org/apache/lucene/codecs/lucene103/blocktree/SegmentTermsEnumFrame.java b/lucene/core/src/java/org/apache/lucene/codecs/lucene103/blocktree/SegmentTermsEnumFrame.java index b55d93968863..96b77c7a08cb 100644 --- a/lucene/core/src/java/org/apache/lucene/codecs/lucene103/blocktree/SegmentTermsEnumFrame.java +++ b/lucene/core/src/java/org/apache/lucene/codecs/lucene103/blocktree/SegmentTermsEnumFrame.java @@ -135,10 +135,10 @@ void loadNextFloorBlock() throws IOException { loadBlock(); } - void prefetchBlock() throws IOException { + boolean prefetchBlock() throws IOException { if (nextEnt != -1) { // Already loaded - return; + return false; } // Clone the IndexInput lazily, so that consumers @@ -147,7 +147,7 @@ void prefetchBlock() throws IOException { ste.initIndexInput(); // TODO: Could we know the number of bytes to prefetch? - ste.in.prefetch(fp, 1); + return ste.in.prefetch(fp, 1); } /* Does initial decode of next block of terms; this diff --git a/lucene/core/src/java/org/apache/lucene/codecs/lucene90/Lucene90NormsProducer.java b/lucene/core/src/java/org/apache/lucene/codecs/lucene90/Lucene90NormsProducer.java index 1965f49fab88..efbea3e22c7d 100644 --- a/lucene/core/src/java/org/apache/lucene/codecs/lucene90/Lucene90NormsProducer.java +++ b/lucene/core/src/java/org/apache/lucene/codecs/lucene90/Lucene90NormsProducer.java @@ -345,8 +345,9 @@ public void close() throws IOException { } @Override - public void prefetch(long offset, long length) throws IOException { + public boolean prefetch(long offset, long length) throws IOException { // Not delegating to the wrapped instance on purpose. This is only used for merging. + return false; } }; } diff --git a/lucene/core/src/java/org/apache/lucene/index/TermStates.java b/lucene/core/src/java/org/apache/lucene/index/TermStates.java index 511a4433f20d..0318cb01aaf6 100644 --- a/lucene/core/src/java/org/apache/lucene/index/TermStates.java +++ b/lucene/core/src/java/org/apache/lucene/index/TermStates.java @@ -193,28 +193,28 @@ public IOSupplier get(LeafReaderContext ctx) throws IOException { this.states[ctx.ord] = EMPTY_TERMSTATE; return null; } - return () -> { - if (this.states[ctx.ord] == null) { - TermState state = null; - if (termExistsSupplier.get()) { - state = termsEnum.termState(); - this.states[ctx.ord] = state; - } else { - this.states[ctx.ord] = EMPTY_TERMSTATE; - } - } - TermState state = this.states[ctx.ord]; - if (state == EMPTY_TERMSTATE) { - return null; - } - return state; - }; - } - TermState state = this.states[ctx.ord]; - if (state == EMPTY_TERMSTATE) { - return null; + IOSupplier stateSupplier = + () -> { + if (this.states[ctx.ord] == null) { + if (termExistsSupplier.get()) { + this.states[ctx.ord] = termsEnum.termState(); + } else { + this.states[ctx.ord] = EMPTY_TERMSTATE; + } + } + TermState termState = this.states[ctx.ord]; + return termState == EMPTY_TERMSTATE ? null : termState; + }; + if (termExistsSupplier.doDefer()) { + return stateSupplier; + } else { + stateSupplier.get(); + TermState termState = this.states[ctx.ord]; + return termState == EMPTY_TERMSTATE ? null : () -> termState; + } } - return () -> state; + TermState termState = this.states[ctx.ord]; + return termState == EMPTY_TERMSTATE ? null : () -> termState; } /** diff --git a/lucene/core/src/java/org/apache/lucene/store/IndexInput.java b/lucene/core/src/java/org/apache/lucene/store/IndexInput.java index 0a6911d116e7..ce0ca2ee0e80 100644 --- a/lucene/core/src/java/org/apache/lucene/store/IndexInput.java +++ b/lucene/core/src/java/org/apache/lucene/store/IndexInput.java @@ -202,8 +202,8 @@ public long readLong(long pos) throws IOException { } @Override - public void prefetch(long offset, long length) throws IOException { - slice.prefetch(offset, length); + public boolean prefetch(long offset, long length) throws IOException { + return slice.prefetch(offset, length); } @Override @@ -223,8 +223,12 @@ public String toString() { * * @param offset start offset * @param length the number of bytes to prefetch + * @return true if prefetch actually prefetched something, hence user can benefit from deferring + * reading the prefetched memory block. */ - public void prefetch(long offset, long length) throws IOException {} + public boolean prefetch(long offset, long length) throws IOException { + return false; + } /** * Optional method: Updates the {@code IOContext} to specify a new read access pattern. IndexInput diff --git a/lucene/core/src/java/org/apache/lucene/store/MemorySegmentIndexInput.java b/lucene/core/src/java/org/apache/lucene/store/MemorySegmentIndexInput.java index 2debe19fdde7..6ce3a99a12d6 100644 --- a/lucene/core/src/java/org/apache/lucene/store/MemorySegmentIndexInput.java +++ b/lucene/core/src/java/org/apache/lucene/store/MemorySegmentIndexInput.java @@ -32,7 +32,7 @@ import org.apache.lucene.util.ArrayUtil; import org.apache.lucene.util.BitUtil; import org.apache.lucene.util.Constants; -import org.apache.lucene.util.IOConsumer; +import org.apache.lucene.util.IOFunction; /** * Base IndexInput implementation that uses an array of MemorySegments to represent a file. @@ -328,9 +328,9 @@ public void seek(long pos) throws IOException { } @Override - public void prefetch(long offset, long length) throws IOException { + public boolean prefetch(long offset, long length) throws IOException { if (NATIVE_ACCESS.isEmpty()) { - return; + return false; } ensureOpen(); @@ -340,11 +340,11 @@ public void prefetch(long offset, long length) throws IOException { // power of two. There is a good chance that a good chunk of this index input is cached in // physical memory. Let's skip the overhead of the madvise system call, we'll be trying again // on the next power of two of the counter. - return; + return false; } final NativeAccess nativeAccess = NATIVE_ACCESS.get(); - advise( + return advise( offset, length, segment -> { @@ -352,7 +352,9 @@ public void prefetch(long offset, long length) throws IOException { // We have a cache miss on at least one page, let's reset the counter. sharedPrefetchCounter.set(0); nativeAccess.madviseWillNeed(segment); + return true; } + return false; }); } @@ -369,14 +371,21 @@ private void updateReadAdvice(ReadAdvice readAdvice) throws IOException { long offset = 0; for (MemorySegment seg : segments) { - advise(offset, seg.byteSize(), segment -> nativeAccess.madvise(segment, readAdvice)); + advise( + offset, + seg.byteSize(), + segment -> { + nativeAccess.madvise(segment, readAdvice); + return true; + }); offset += seg.byteSize(); } } - void advise(long offset, long length, IOConsumer advice) throws IOException { + boolean advise(long offset, long length, IOFunction advice) + throws IOException { if (NATIVE_ACCESS.isEmpty()) { - return; + return false; } ensureOpen(); @@ -404,12 +413,12 @@ void advise(long offset, long length, IOConsumer advice) throws I length -= nativeAccess.getPageSize(); if (length <= 0) { // This segment has no data beyond the first page. - return; + return false; } } final MemorySegment advisedSlice = segment.asSlice(offset, length); - advice.accept(advisedSlice); + return advice.apply(advisedSlice); } catch (IndexOutOfBoundsException _) { throw new EOFException("Read past EOF: " + this); } catch (NullPointerException | IllegalStateException e) { @@ -615,6 +624,7 @@ public final MemorySegmentIndexInput slice( slice.length, segment -> { nativeAccess.madvise(segment, advice); + return true; }); } } @@ -804,9 +814,9 @@ public MemorySegment segmentSliceOrNull(long pos, long len) throws IOException { } @Override - public void prefetch(long offset, long length) throws IOException { + public boolean prefetch(long offset, long length) throws IOException { Objects.checkFromIndexSize(offset, length, this.length); - super.prefetch(offset, length); + return super.prefetch(offset, length); } } @@ -904,9 +914,9 @@ MemorySegmentIndexInput buildSlice(String sliceDescription, long ofs, long lengt } @Override - public void prefetch(long offset, long length) throws IOException { + public boolean prefetch(long offset, long length) throws IOException { Objects.checkFromIndexSize(offset, length, this.length); - super.prefetch(this.offset + offset, length); + return super.prefetch(this.offset + offset, length); } } } diff --git a/lucene/core/src/java/org/apache/lucene/store/RandomAccessInput.java b/lucene/core/src/java/org/apache/lucene/store/RandomAccessInput.java index 44127d90a980..28d2005b4131 100644 --- a/lucene/core/src/java/org/apache/lucene/store/RandomAccessInput.java +++ b/lucene/core/src/java/org/apache/lucene/store/RandomAccessInput.java @@ -77,7 +77,9 @@ default void readBytes(long pos, byte[] bytes, int offset, int length) throws IO * * @see IndexInput#prefetch */ - default void prefetch(long offset, long length) throws IOException {} + default boolean prefetch(long offset, long length) throws IOException { + return false; + } /** * Returns a hint whether all the contents of this input are resident in physical memory. diff --git a/lucene/core/src/java/org/apache/lucene/util/IOBooleanSupplier.java b/lucene/core/src/java/org/apache/lucene/util/IOBooleanSupplier.java index 4100c6c53c53..fd5d71d73077 100644 --- a/lucene/core/src/java/org/apache/lucene/util/IOBooleanSupplier.java +++ b/lucene/core/src/java/org/apache/lucene/util/IOBooleanSupplier.java @@ -34,4 +34,13 @@ public interface IOBooleanSupplier { * @throws IOException if supplying the result throws an {@link IOException} */ boolean get() throws IOException; + + /** + * Returns whether the TermState get should be deferred. + * + * @return {@code true} if the TermState get should be deferred, {@code false} otherwise + */ + default boolean doDefer() { + return false; + } } diff --git a/lucene/core/src/test/org/apache/lucene/codecs/lucene90/TestLucene90StoredFieldsFormat.java b/lucene/core/src/test/org/apache/lucene/codecs/lucene90/TestLucene90StoredFieldsFormat.java index ae9c6d7b53f6..67ad3d54dd4f 100644 --- a/lucene/core/src/test/org/apache/lucene/codecs/lucene90/TestLucene90StoredFieldsFormat.java +++ b/lucene/core/src/test/org/apache/lucene/codecs/lucene90/TestLucene90StoredFieldsFormat.java @@ -66,9 +66,10 @@ public CountingPrefetchIndexInput(IndexInput input, AtomicInteger counter) { } @Override - public void prefetch(long offset, long length) throws IOException { + public boolean prefetch(long offset, long length) throws IOException { in.prefetch(offset, length); counter.incrementAndGet(); + return true; } @Override diff --git a/lucene/core/src/test/org/apache/lucene/codecs/lucene90/TestLucene90TermVectorsFormat.java b/lucene/core/src/test/org/apache/lucene/codecs/lucene90/TestLucene90TermVectorsFormat.java index 8cc6dda1c1d7..40ca3ab41d15 100644 --- a/lucene/core/src/test/org/apache/lucene/codecs/lucene90/TestLucene90TermVectorsFormat.java +++ b/lucene/core/src/test/org/apache/lucene/codecs/lucene90/TestLucene90TermVectorsFormat.java @@ -68,9 +68,10 @@ public CountingPrefetchIndexInput(IndexInput input, AtomicInteger counter) { } @Override - public void prefetch(long offset, long length) throws IOException { + public boolean prefetch(long offset, long length) throws IOException { in.prefetch(offset, length); counter.incrementAndGet(); + return true; } @Override diff --git a/lucene/test-framework/src/java/org/apache/lucene/tests/store/MockIndexInputWrapper.java b/lucene/test-framework/src/java/org/apache/lucene/tests/store/MockIndexInputWrapper.java index fcf29fcb365e..9e54cb83ac6e 100644 --- a/lucene/test-framework/src/java/org/apache/lucene/tests/store/MockIndexInputWrapper.java +++ b/lucene/test-framework/src/java/org/apache/lucene/tests/store/MockIndexInputWrapper.java @@ -171,10 +171,10 @@ public void seek(long pos) throws IOException { } @Override - public void prefetch(long offset, long length) throws IOException { + public boolean prefetch(long offset, long length) throws IOException { ensureOpen(); ensureAccessible(); - in.prefetch(offset, length); + return in.prefetch(offset, length); } @Override diff --git a/lucene/test-framework/src/java/org/apache/lucene/tests/store/SerialIOCountingDirectory.java b/lucene/test-framework/src/java/org/apache/lucene/tests/store/SerialIOCountingDirectory.java index 778f7e686590..f087e7fd20fb 100644 --- a/lucene/test-framework/src/java/org/apache/lucene/tests/store/SerialIOCountingDirectory.java +++ b/lucene/test-framework/src/java/org/apache/lucene/tests/store/SerialIOCountingDirectory.java @@ -126,7 +126,7 @@ private void onRead(long offset, int len) { } @Override - public void prefetch(long offset, long length) throws IOException { + public boolean prefetch(long offset, long length) throws IOException { final long firstPage = (sliceOffset + offset) >> PAGE_SHIFT; final long lastPage = (sliceOffset + offset + length - 1) >> PAGE_SHIFT; @@ -152,6 +152,8 @@ public void prefetch(long offset, long length) throws IOException { for (long page = firstPage; page <= lastPage; ++page) { pendingPages.add(page); } + + return true; } @Override