-
Notifications
You must be signed in to change notification settings - Fork 9.2k
HADOOP-19767: [ABFS] Introduce Abfs Input Policy for detecting read patterns #8153
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from 5 commits
bb27ec4
16253aa
68f905c
bb42e3c
94d2336
46b3e18
3560cc5
f3b6b57
c53a82f
2ed6c25
7eb974a
81763f8
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -21,6 +21,7 @@ | |
| import org.apache.hadoop.classification.InterfaceAudience; | ||
| import org.apache.hadoop.classification.InterfaceStability; | ||
| import org.apache.hadoop.fs.FileSystem; | ||
| import org.apache.hadoop.fs.Options; | ||
|
|
||
| import static org.apache.hadoop.fs.azurebfs.constants.AbfsHttpConstants.DOT; | ||
|
|
||
|
|
@@ -215,6 +216,12 @@ public final class ConfigurationKeys { | |
| public static final String FS_AZURE_READ_AHEAD_QUEUE_DEPTH = "fs.azure.readaheadqueue.depth"; | ||
| public static final String FS_AZURE_ALWAYS_READ_BUFFER_SIZE = "fs.azure.read.alwaysReadBufferSize"; | ||
| public static final String FS_AZURE_READ_AHEAD_BLOCK_SIZE = "fs.azure.read.readahead.blocksize"; | ||
| /** | ||
| * Provides hint for the read workload pattern. | ||
| * Possible Values Exposed in {@link Options.OpenFileOptions} | ||
|
||
| */ | ||
| public static final String FS_AZURE_READ_POLICY = "fs.azure.read.policy"; | ||
|
|
||
| /** Provides a config control to enable or disable ABFS Flush operations - | ||
| * HFlush and HSync. Default is true. **/ | ||
| public static final String FS_AZURE_ENABLE_FLUSH = "fs.azure.enable.flush"; | ||
|
|
||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -22,6 +22,7 @@ | |
| import org.apache.hadoop.classification.InterfaceStability; | ||
| import org.apache.hadoop.security.ssl.DelegatingSSLSocketFactory; | ||
|
|
||
| import static org.apache.hadoop.fs.Options.OpenFileOptions.FS_OPTION_OPENFILE_READ_POLICY_ADAPTIVE; | ||
| import static org.apache.hadoop.fs.azurebfs.constants.AbfsHttpConstants.EMPTY_STRING; | ||
|
|
||
| /** | ||
|
|
@@ -93,7 +94,7 @@ | |
|
|
||
| /** Default buffer sizes and optimization flags. */ | ||
| public static final int DEFAULT_WRITE_BUFFER_SIZE = 8 * ONE_MB; // 8 MB | ||
| public static final int APPENDBLOB_MAX_WRITE_BUFFER_SIZE = 4 * ONE_MB; // 4 MB | ||
|
Check failure on line 97 in hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/constants/FileSystemConfigurations.java
|
||
| public static final boolean DEFAULT_AZURE_ENABLE_SMALL_WRITE_OPTIMIZATION = false; | ||
| public static final int DEFAULT_READ_BUFFER_SIZE = 4 * ONE_MB; // 4 MB | ||
| public static final boolean DEFAULT_READ_SMALL_FILES_COMPLETELY = false; | ||
|
|
@@ -108,6 +109,7 @@ | |
| public static final long MAX_AZURE_BLOCK_SIZE = 256 * 1024 * 1024L; // changing default abfs blocksize to 256MB | ||
| public static final String AZURE_BLOCK_LOCATION_HOST_DEFAULT = "localhost"; | ||
| public static final int DEFAULT_AZURE_LIST_MAX_RESULTS = 5000; | ||
| public static final String DEFAULT_FS_AZURE_READ_POLICY = FS_OPTION_OPENFILE_READ_POLICY_ADAPTIVE; | ||
|
|
||
| public static final String SERVER_SIDE_ENCRYPTION_ALGORITHM = "AES256"; | ||
|
|
||
|
|
@@ -416,7 +418,7 @@ | |
|
|
||
| public static final boolean DEFAULT_FS_AZURE_ENABLE_CREATE_BLOB_IDEMPOTENCY = true; | ||
|
|
||
| public static final boolean DEFAULT_FS_AZURE_ENABLE_PREFETCH_REQUEST_PRIORITY = true; | ||
| public static final boolean DEFAULT_FS_AZURE_ENABLE_PREFETCH_REQUEST_PRIORITY = false; | ||
|
||
|
|
||
| // The default traffic request priority is 3 (from service side) | ||
| // The lowest priority a request can get is 7 | ||
|
|
||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,109 @@ | ||
| /** | ||
| * Licensed to the Apache Software Foundation (ASF) under one | ||
| * or more contributor license agreements. See the NOTICE file | ||
| * distributed with this work for additional information | ||
| * regarding copyright ownership. The ASF licenses this file | ||
| * to you under the Apache License, Version 2.0 (the | ||
| * "License"); you may not use this file except in compliance | ||
| * with the License. You may obtain a copy of the License at | ||
| * | ||
| * http://www.apache.org/licenses/LICENSE-2.0 | ||
| * | ||
| * Unless required by applicable law or agreed to in writing, software | ||
| * distributed under the License is distributed on an "AS IS" BASIS, | ||
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | ||
| * See the License for the specific language governing permissions and | ||
| * limitations under the License. | ||
| */ | ||
|
|
||
| package org.apache.hadoop.fs.azurebfs.services; | ||
|
|
||
| import java.io.IOException; | ||
|
|
||
| import org.apache.hadoop.fs.FileSystem; | ||
| import org.apache.hadoop.fs.azurebfs.constants.ReadType; | ||
| import org.apache.hadoop.fs.azurebfs.utils.TracingContext; | ||
|
|
||
| import static java.lang.Math.max; | ||
|
|
||
| /** | ||
| * Input stream implementation optimized for adaptive read patterns. | ||
| * This is the default implementation used for cases where user does not specify any input policy. | ||
| * It switches between sequential and random read optimizations based on the detected read pattern. | ||
| * It also keeps footer read and small file optimizations enabled. | ||
| */ | ||
| public class AbfsAdaptiveInputStream extends AbfsInputStream { | ||
|
|
||
| public AbfsAdaptiveInputStream( | ||
|
Check failure on line 37 in hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/services/AbfsAdaptiveInputStream.java
|
||
anujmodi2021 marked this conversation as resolved.
Show resolved
Hide resolved
|
||
| final AbfsClient client, | ||
| final FileSystem.Statistics statistics, | ||
| final String path, | ||
| final long contentLength, | ||
| final AbfsInputStreamContext abfsInputStreamContext, | ||
| final String eTag, | ||
| TracingContext tracingContext) { | ||
| super(client, statistics, path, contentLength, | ||
| abfsInputStreamContext, eTag, tracingContext); | ||
| } | ||
|
|
||
| /** | ||
| * {@inheritDoc} | ||
| */ | ||
| @Override | ||
| protected int readOneBlock(final byte[] b, final int off, final int len) throws IOException { | ||
| if (len == 0) { | ||
| return 0; | ||
| } | ||
| if (!validate(b, off, len)) { | ||
| return -1; | ||
| } | ||
| //If buffer is empty, then fill the buffer. | ||
|
||
| if (bCursor == limit) { | ||
| //If EOF, then return -1 | ||
|
||
| if (fCursor >= contentLength) { | ||
| return -1; | ||
| } | ||
|
|
||
| long bytesRead = 0; | ||
| //reset buffer to initial state - i.e., throw away existing data | ||
|
||
| bCursor = 0; | ||
| limit = 0; | ||
| if (buffer == null) { | ||
| LOG.debug("created new buffer size {}", bufferSize); | ||
| buffer = new byte[bufferSize]; | ||
| } | ||
|
|
||
| // Reset Read Type back to normal and set again based on code flow. | ||
| tracingContext.setReadType(ReadType.NORMAL_READ); | ||
| if (alwaysReadBufferSize) { | ||
| bytesRead = readInternal(fCursor, buffer, 0, bufferSize, false); | ||
| } else { | ||
| // Enable readAhead when reading sequentially | ||
| if (-1 == fCursorAfterLastRead || fCursorAfterLastRead == fCursor || b.length >= bufferSize) { | ||
| LOG.debug("Sequential read with read ahead size of {}", bufferSize); | ||
| bytesRead = readInternal(fCursor, buffer, 0, bufferSize, false); | ||
| } else { | ||
| /* | ||
| * Disable queuing prefetches when random read pattern detected. | ||
| * Instead, read ahead only for readAheadRange above what is asked by caller. | ||
| */ | ||
| tracingContext.setReadType(ReadType.RANDOM_READ); | ||
| int lengthWithReadAhead = Math.min(b.length + readAheadRange, bufferSize); | ||
| LOG.debug("Random read with read ahead size of {}", lengthWithReadAhead); | ||
| bytesRead = readInternal(fCursor, buffer, 0, lengthWithReadAhead, true); | ||
| } | ||
| } | ||
| if (firstRead) { | ||
| firstRead = false; | ||
| } | ||
| if (bytesRead == -1) { | ||
| return -1; | ||
| } | ||
|
|
||
| limit += bytesRead; | ||
| fCursor += bytesRead; | ||
| fCursorAfterLastRead = fCursor; | ||
| } | ||
| return copyToUserBuffer(b, off, len); | ||
| } | ||
| } | ||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,78 @@ | ||
| /** | ||
| * Licensed to the Apache Software Foundation (ASF) under one | ||
| * or more contributor license agreements. See the NOTICE file | ||
| * distributed with this work for additional information | ||
| * regarding copyright ownership. The ASF licenses this file | ||
| * to you under the Apache License, Version 2.0 (the | ||
| * "License"); you may not use this file except in compliance | ||
| * with the License. You may obtain a copy of the License at | ||
| * | ||
| * http://www.apache.org/licenses/LICENSE-2.0 | ||
| * | ||
| * Unless required by applicable law or agreed to in writing, software | ||
| * distributed under the License is distributed on an "AS IS" BASIS, | ||
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | ||
| * See the License for the specific language governing permissions and | ||
| * limitations under the License. | ||
| */ | ||
|
|
||
| package org.apache.hadoop.fs.azurebfs.services; | ||
|
|
||
| import java.util.Locale; | ||
|
|
||
| import static org.apache.hadoop.fs.Options.OpenFileOptions.FS_OPTION_OPENFILE_READ_POLICY_ADAPTIVE; | ||
| import static org.apache.hadoop.fs.Options.OpenFileOptions.FS_OPTION_OPENFILE_READ_POLICY_COLUMNAR; | ||
| import static org.apache.hadoop.fs.Options.OpenFileOptions.FS_OPTION_OPENFILE_READ_POLICY_ORC; | ||
| import static org.apache.hadoop.fs.Options.OpenFileOptions.FS_OPTION_OPENFILE_READ_POLICY_PARQUET; | ||
| import static org.apache.hadoop.fs.Options.OpenFileOptions.FS_OPTION_OPENFILE_READ_POLICY_RANDOM; | ||
| import static org.apache.hadoop.fs.Options.OpenFileOptions.FS_OPTION_OPENFILE_READ_POLICY_SEQUENTIAL; | ||
| import static org.apache.hadoop.fs.Options.OpenFileOptions.FS_OPTION_OPENFILE_READ_POLICY_WHOLE_FILE; | ||
|
|
||
| /** | ||
| * Enum for ABFS Input Policies. | ||
| * Each policy maps to a particular implementation of {@link AbfsInputStream} | ||
| */ | ||
| public enum AbfsInputPolicy { | ||
|
||
|
|
||
| SEQUENTIAL(FS_OPTION_OPENFILE_READ_POLICY_SEQUENTIAL), | ||
| RANDOM(FS_OPTION_OPENFILE_READ_POLICY_RANDOM), | ||
| ADAPTIVE(FS_OPTION_OPENFILE_READ_POLICY_ADAPTIVE); | ||
|
|
||
| private final String policy; | ||
|
||
|
|
||
| AbfsInputPolicy(String policy) { | ||
| this.policy = policy; | ||
| } | ||
|
|
||
| @Override | ||
| public String toString() { | ||
| return policy; | ||
| } | ||
|
|
||
| /** | ||
| * Get the enum constant from the string name. | ||
| * @param name policy name as configured by user | ||
| * @return the corresponding AbsInputPolicy to be used | ||
| */ | ||
| public static AbfsInputPolicy getPolicy(String name) { | ||
| String trimmed = name.trim().toLowerCase(Locale.ENGLISH); | ||
| switch (trimmed) { | ||
| // all these options currently map to random IO. | ||
| case FS_OPTION_OPENFILE_READ_POLICY_RANDOM: | ||
| case FS_OPTION_OPENFILE_READ_POLICY_COLUMNAR: | ||
| case FS_OPTION_OPENFILE_READ_POLICY_ORC: | ||
| case FS_OPTION_OPENFILE_READ_POLICY_PARQUET: | ||
| return RANDOM; | ||
|
|
||
| // handle the sequential formats. | ||
| case FS_OPTION_OPENFILE_READ_POLICY_SEQUENTIAL: | ||
| case FS_OPTION_OPENFILE_READ_POLICY_WHOLE_FILE: | ||
| return SEQUENTIAL; | ||
|
|
||
| // Everything else including ABFS Default Policy maps to Adaptive | ||
| case FS_OPTION_OPENFILE_READ_POLICY_ADAPTIVE: | ||
| default: | ||
| return ADAPTIVE; | ||
| } | ||
| } | ||
| } | ||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
instead of importing entire Options class, we can just import OpenFileOptions class and directly mention OpenFileOptions class below in comments.
import org.apache.hadoop.fs.Options.OpenFileOptions;
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Taken