-
Notifications
You must be signed in to change notification settings - Fork 9.2k
HADOOP-18291. S3A prefetch - Implement thread-safe LRU cache for SingleFilePerBlockCache #5754
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from 1 commit
282ddd1
4112f9c
96b483c
108cb96
ab42829
87ea604
2c4e0cf
c72a556
2006290
0060914
fe06113
8caaec9
709163c
796a3d4
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -47,6 +47,7 @@ | |
|
|
||
| import org.apache.hadoop.conf.Configuration; | ||
| import org.apache.hadoop.fs.LocalDirAllocator; | ||
| import org.apache.hadoop.util.Preconditions; | ||
|
|
||
| import static java.util.Objects.requireNonNull; | ||
| import static org.apache.hadoop.fs.impl.prefetch.Validate.checkNotNull; | ||
|
|
@@ -61,7 +62,27 @@ public class SingleFilePerBlockCache implements BlockCache { | |
| /** | ||
| * Blocks stored in this cache. | ||
| */ | ||
| private final Map<Integer, Entry> blocks = new ConcurrentHashMap<>(); | ||
| private final Map<Integer, Entry> blocks; | ||
|
|
||
| /** | ||
| * Total max blocks count, to be considered as baseline for LRU cache. | ||
| */ | ||
| private final int maxBlocksCount; | ||
|
|
||
| /** | ||
| * The lock to be shared by LRU based linked list updates. | ||
| */ | ||
| private final ReentrantReadWriteLock blocksLock; | ||
|
|
||
| /** | ||
| * Head of the linked list. | ||
| */ | ||
| private Entry head; | ||
|
|
||
| /** | ||
| * Tail of the lined list. | ||
| */ | ||
| private Entry tail; | ||
|
|
||
| /** | ||
| * Number of times a block was read from this cache. | ||
|
|
@@ -89,6 +110,16 @@ public class SingleFilePerBlockCache implements BlockCache { | |
| private static final Set<PosixFilePermission> TEMP_FILE_ATTRS = | ||
| ImmutableSet.of(PosixFilePermission.OWNER_READ, PosixFilePermission.OWNER_WRITE); | ||
|
|
||
| /** | ||
| * Prefetch max blocks count config. | ||
| */ | ||
| public static final String FS_PREFETCH_MAX_BLOCKS_COUNT = "fs.prefetch.max.blocks.count"; | ||
|
||
|
|
||
| /** | ||
| * Default value for max blocks count config. | ||
| */ | ||
| private static final int DEFAULT_FS_PREFETCH_MAX_BLOCKS_COUNT = 20; | ||
|
|
||
| /** | ||
| * Cache entry. | ||
| * Each block is stored as a separate file. | ||
|
|
@@ -103,13 +134,17 @@ private enum LockType { | |
| READ, | ||
| WRITE | ||
| } | ||
| private Entry previous; | ||
steveloughran marked this conversation as resolved.
Show resolved
Hide resolved
|
||
| private Entry next; | ||
|
|
||
| Entry(int blockNumber, Path path, int size, long checksum) { | ||
| this.blockNumber = blockNumber; | ||
| this.path = path; | ||
| this.size = size; | ||
| this.checksum = checksum; | ||
| this.lock = new ReentrantReadWriteLock(); | ||
| this.previous = null; | ||
| this.next = null; | ||
| } | ||
|
|
||
| @Override | ||
|
|
@@ -166,16 +201,39 @@ private boolean takeLock(LockType lockType, long timeout, TimeUnit unit) { | |
| } | ||
| return false; | ||
| } | ||
|
|
||
| private Entry getPrevious() { | ||
| return previous; | ||
| } | ||
|
|
||
| private void setPrevious(Entry previous) { | ||
| this.previous = previous; | ||
| } | ||
|
|
||
| private Entry getNext() { | ||
| return next; | ||
| } | ||
|
|
||
| private void setNext(Entry next) { | ||
| this.next = next; | ||
| } | ||
| } | ||
|
|
||
| /** | ||
| * Constructs an instance of a {@code SingleFilePerBlockCache}. | ||
| * | ||
| * @param prefetchingStatistics statistics for this stream. | ||
| * @param conf the configuration object. | ||
| */ | ||
| public SingleFilePerBlockCache(PrefetchingStatistics prefetchingStatistics) { | ||
| public SingleFilePerBlockCache(PrefetchingStatistics prefetchingStatistics, Configuration conf) { | ||
| this.prefetchingStatistics = requireNonNull(prefetchingStatistics); | ||
| this.closed = new AtomicBoolean(false); | ||
| this.maxBlocksCount = | ||
| conf.getInt(FS_PREFETCH_MAX_BLOCKS_COUNT, DEFAULT_FS_PREFETCH_MAX_BLOCKS_COUNT); | ||
| Preconditions.checkArgument(this.maxBlocksCount > 0, | ||
| "prefetch blocks total capacity should be more than 0"); | ||
|
||
| blocks = new ConcurrentHashMap<>(); | ||
| blocksLock = new ReentrantReadWriteLock(); | ||
| } | ||
|
|
||
| /** | ||
|
|
@@ -247,9 +305,46 @@ private Entry getEntry(int blockNumber) { | |
| throw new IllegalStateException(String.format("block %d not found in cache", blockNumber)); | ||
| } | ||
| numGets++; | ||
| addToHeadOfLinkedList(entry); | ||
| return entry; | ||
| } | ||
|
|
||
| /** | ||
| * Add the given entry to the head of the linked list. | ||
| * | ||
| * @param entry Block entry to add. | ||
| */ | ||
| private void addToHeadOfLinkedList(Entry entry) { | ||
| blocksLock.writeLock().lock(); | ||
| try { | ||
steveloughran marked this conversation as resolved.
Show resolved
Hide resolved
|
||
| if (head == null) { | ||
| head = entry; | ||
| tail = entry; | ||
| } | ||
| if (entry != head) { | ||
| Entry prev = entry.getPrevious(); | ||
| Entry nxt = entry.getNext(); | ||
| if (prev != null) { | ||
| prev.setNext(nxt); | ||
| } | ||
| if (nxt != null) { | ||
| nxt.setPrevious(prev); | ||
| } | ||
| entry.setPrevious(null); | ||
| entry.setNext(head); | ||
| head.setPrevious(entry); | ||
| head = entry; | ||
| } | ||
| if (tail != null) { | ||
| while (tail.getNext() != null) { | ||
| tail = tail.getNext(); | ||
| } | ||
| } | ||
|
||
| } finally { | ||
| blocksLock.writeLock().unlock(); | ||
| } | ||
| } | ||
|
|
||
| /** | ||
| * Puts the given block in this cache. | ||
| * | ||
|
|
@@ -278,6 +373,7 @@ public void put(int blockNumber, ByteBuffer buffer, Configuration conf, | |
| } finally { | ||
| entry.releaseLock(Entry.LockType.READ); | ||
| } | ||
| addToHeadOfLinkedList(entry); | ||
| return; | ||
| } | ||
|
|
||
|
|
@@ -299,9 +395,62 @@ public void put(int blockNumber, ByteBuffer buffer, Configuration conf, | |
| // Update stream_read_blocks_in_cache stats only after blocks map is updated with new file | ||
| // entry to avoid any discrepancy related to the value of stream_read_blocks_in_cache. | ||
| // If stream_read_blocks_in_cache is updated before updating the blocks map here, closing of | ||
| // the input stream can lead to the removal of the cache file even before blocks is added with | ||
| // the new cache file, leading to incorrect value of stream_read_blocks_in_cache. | ||
| // the input stream can lead to the removal of the cache file even before blocks is added | ||
| // with the new cache file, leading to incorrect value of stream_read_blocks_in_cache. | ||
| prefetchingStatistics.blockAddedToFileCache(); | ||
| addToLinkedListAndEvictIfRequired(entry); | ||
| } | ||
|
|
||
| /** | ||
| * Add the given entry to the head of the linked list and if the LRU cache size | ||
| * exceeds the max limit, evict tail of the LRU linked list. | ||
| * | ||
| * @param entry Block entry to add. | ||
| */ | ||
| private void addToLinkedListAndEvictIfRequired(Entry entry) { | ||
| addToHeadOfLinkedList(entry); | ||
| blocksLock.writeLock().lock(); | ||
steveloughran marked this conversation as resolved.
Show resolved
Hide resolved
|
||
| try { | ||
| if (blocks.size() > maxBlocksCount && !closed.get()) { | ||
| Entry elementToPurge = tail; | ||
| tail = tail.getPrevious(); | ||
| if (tail == null) { | ||
| tail = head; | ||
| } | ||
| tail.setNext(null); | ||
| elementToPurge.setPrevious(null); | ||
| deleteBlockFileAndEvictCache(elementToPurge); | ||
| } | ||
| } finally { | ||
| blocksLock.writeLock().unlock(); | ||
| } | ||
| } | ||
|
|
||
| /** | ||
| * Delete cache file as part of the block cache LRU eviction. | ||
| * | ||
| * @param elementToPurge Block entry to evict. | ||
| */ | ||
| private void deleteBlockFileAndEvictCache(Entry elementToPurge) { | ||
| boolean lockAcquired = | ||
| elementToPurge.takeLock(Entry.LockType.WRITE, PREFETCH_WRITE_LOCK_TIMEOUT, | ||
| PREFETCH_WRITE_LOCK_TIMEOUT_UNIT); | ||
| if (!lockAcquired) { | ||
| LOG.error("Cache file {} deletion would not be attempted as write lock could not" | ||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. So, there can be a scenario where the current cache exceeds its normal capacity? Is 5 seconds enough time? or are we okay with this?
Contributor
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. since we are already using 5s at other place also (PREFETCH_WRITE_LOCK_TIMEOUT), used it here as well but happy to change it in future as/if we encounter some problem with this, does that sound good?
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. It seems like we are okay with things not blowing up if eviction is not successful, are we okay with it? Can this hurt in the long run?
Contributor
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. it should be okay, in fact we have same logic for input stream close as well, if eviction or removal of disk block is unsuccessful, we are leaving them with a fat warning.
Contributor
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. if eviction misses it, stream close would be able to clean it up.
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Okay, sounds good |
||
| + " be acquired within {} {}", elementToPurge.path, PREFETCH_WRITE_LOCK_TIMEOUT, | ||
| PREFETCH_WRITE_LOCK_TIMEOUT_UNIT); | ||
| } else { | ||
| try { | ||
| if (Files.deleteIfExists(elementToPurge.path)) { | ||
| prefetchingStatistics.blockRemovedFromFileCache(); | ||
| blocks.remove(elementToPurge.blockNumber); | ||
| } | ||
| } catch (IOException e) { | ||
| LOG.warn("Failed to delete cache file {}", elementToPurge.path, e); | ||
| } finally { | ||
| elementToPurge.releaseLock(Entry.LockType.WRITE); | ||
| } | ||
| } | ||
| } | ||
|
|
||
| private static final Set<? extends OpenOption> CREATE_OPTIONS = | ||
|
|
@@ -337,30 +486,36 @@ protected Path getCacheFilePath(final Configuration conf, | |
| public void close() throws IOException { | ||
| if (closed.compareAndSet(false, true)) { | ||
| LOG.debug(getStats()); | ||
| int numFilesDeleted = 0; | ||
| deleteCacheFiles(); | ||
| } | ||
| } | ||
|
|
||
| for (Entry entry : blocks.values()) { | ||
| boolean lockAcquired = entry.takeLock(Entry.LockType.WRITE, PREFETCH_WRITE_LOCK_TIMEOUT, | ||
| /** | ||
| * Delete cache files as part of the close call. | ||
| */ | ||
| private void deleteCacheFiles() { | ||
| int numFilesDeleted = 0; | ||
| for (Entry entry : blocks.values()) { | ||
| boolean lockAcquired = entry.takeLock(Entry.LockType.WRITE, PREFETCH_WRITE_LOCK_TIMEOUT, | ||
| PREFETCH_WRITE_LOCK_TIMEOUT_UNIT); | ||
| if (!lockAcquired) { | ||
| LOG.error("Cache file {} deletion would not be attempted as write lock could not" | ||
| + " be acquired within {} {}", entry.path, PREFETCH_WRITE_LOCK_TIMEOUT, | ||
| PREFETCH_WRITE_LOCK_TIMEOUT_UNIT); | ||
| if (!lockAcquired) { | ||
| LOG.error("Cache file {} deletion would not be attempted as write lock could not" | ||
| + " be acquired within {} {}", entry.path, PREFETCH_WRITE_LOCK_TIMEOUT, | ||
| PREFETCH_WRITE_LOCK_TIMEOUT_UNIT); | ||
| continue; | ||
| } | ||
| try { | ||
| Files.deleteIfExists(entry.path); | ||
| continue; | ||
| } | ||
| try { | ||
| if (Files.deleteIfExists(entry.path)) { | ||
| prefetchingStatistics.blockRemovedFromFileCache(); | ||
| numFilesDeleted++; | ||
| } catch (IOException e) { | ||
| LOG.warn("Failed to delete cache file {}", entry.path, e); | ||
| } finally { | ||
| entry.releaseLock(Entry.LockType.WRITE); | ||
| } | ||
| } catch (IOException e) { | ||
| LOG.warn("Failed to delete cache file {}", entry.path, e); | ||
| } finally { | ||
| entry.releaseLock(Entry.LockType.WRITE); | ||
| } | ||
|
|
||
| LOG.debug("Prefetch cache close: Deleted {} cache files", numFilesDeleted); | ||
| } | ||
| LOG.debug("Prefetch cache close: Deleted {} cache files", numFilesDeleted); | ||
| } | ||
|
|
||
| @Override | ||
|
|
||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
typo: "linked"