apache · anujmodi2021 · Jan 23, 2026 · Dec 29, 2025 · Dec 30, 2025 · Dec 30, 2025
diff --git a/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/AbfsConfiguration.java b/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/AbfsConfiguration.java
@@ -656,6 +656,10 @@ public class AbfsConfiguration{
       DefaultValue = DEFAULT_FS_AZURE_LOWEST_REQUEST_PRIORITY_VALUE)
   private int prefetchRequestPriorityValue;
 
+  @StringConfigurationValidatorAnnotation(ConfigurationKey = FS_AZURE_READ_POLICY,
+          DefaultValue = DEFAULT_FS_AZURE_READ_POLICY)
+  private String abfsReadPolicy;
+
   private String clientProvidedEncryptionKey;
   private String clientProvidedEncryptionKeySHA;
 
@@ -1381,6 +1385,14 @@ public String getPrefetchRequestPriorityValue() {
     return Integer.toString(prefetchRequestPriorityValue);
   }
 
+  /**
+   * Get the ABFS read policy set by user.
+   * @return the ABFS read policy.
+   */
+  public String getAbfsReadPolicy() {
+    return abfsReadPolicy;
+  }
+
   /**
    * Enum config to allow user to pick format of x-ms-client-request-id header
    * @return tracingContextFormat config if valid, else default ALL_ID_FORMAT
@@ -2079,6 +2091,15 @@ public void setIsChecksumValidationEnabled(boolean isChecksumValidationEnabled)
     this.isChecksumValidationEnabled = isChecksumValidationEnabled;
   }
 
+  /**
+   * Sets the ABFS read policy for testing purposes.
+   * @param readPolicy the read policy to set.
+   */
+  @VisibleForTesting
+  public void setAbfsReadPolicy(String readPolicy) {
+    abfsReadPolicy = readPolicy;
+  }
+
   public boolean isFullBlobChecksumValidationEnabled() {
     return isFullBlobChecksumValidationEnabled;
   }

diff --git a/...ls/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/AzureBlobFileSystemStore.java b/...ls/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/AzureBlobFileSystemStore.java
@@ -77,7 +77,6 @@
 import org.apache.hadoop.fs.azurebfs.contracts.exceptions.InvalidUriException;
 import org.apache.hadoop.fs.azurebfs.contracts.exceptions.TrileanConversionException;
 import org.apache.hadoop.fs.azurebfs.contracts.services.AzureServiceErrorCode;
-import org.apache.hadoop.fs.azurebfs.services.ListResponseData;
 import org.apache.hadoop.fs.azurebfs.enums.Trilean;
 import org.apache.hadoop.fs.azurebfs.extensions.EncryptionContextProvider;
 import org.apache.hadoop.fs.azurebfs.extensions.ExtensionHelper;
@@ -90,13 +89,15 @@
 import org.apache.hadoop.fs.azurebfs.security.ContextProviderEncryptionAdapter;
 import org.apache.hadoop.fs.azurebfs.security.NoContextEncryptionAdapter;
 import org.apache.hadoop.fs.azurebfs.services.AbfsAclHelper;
+import org.apache.hadoop.fs.azurebfs.services.AbfsAdaptiveInputStream;
 import org.apache.hadoop.fs.azurebfs.services.AbfsClient;
 import org.apache.hadoop.fs.azurebfs.services.AbfsClientContext;
 import org.apache.hadoop.fs.azurebfs.services.AbfsClientContextBuilder;
 import org.apache.hadoop.fs.azurebfs.services.AbfsClientHandler;
 import org.apache.hadoop.fs.azurebfs.services.AbfsClientRenameResult;
 import org.apache.hadoop.fs.azurebfs.services.AbfsCounters;
 import org.apache.hadoop.fs.azurebfs.services.AbfsHttpOperation;
+import org.apache.hadoop.fs.azurebfs.services.AbfsInputPolicy;
 import org.apache.hadoop.fs.azurebfs.services.AbfsInputStream;
 import org.apache.hadoop.fs.azurebfs.services.AbfsInputStreamContext;
 import org.apache.hadoop.fs.azurebfs.services.AbfsInputStreamStatisticsImpl;
@@ -107,10 +108,13 @@
 import org.apache.hadoop.fs.azurebfs.services.AbfsPerfInfo;
 import org.apache.hadoop.fs.azurebfs.services.AbfsPerfTracker;
 import org.apache.hadoop.fs.azurebfs.services.AbfsPermission;
+import org.apache.hadoop.fs.azurebfs.services.AbfsPrefetchInputStream;
+import org.apache.hadoop.fs.azurebfs.services.AbfsRandomInputStream;
 import org.apache.hadoop.fs.azurebfs.services.AbfsRestOperation;
 import org.apache.hadoop.fs.azurebfs.services.AuthType;
 import org.apache.hadoop.fs.azurebfs.services.ExponentialRetryPolicy;
 import org.apache.hadoop.fs.azurebfs.services.ListingSupport;
+import org.apache.hadoop.fs.azurebfs.services.ListResponseData;
 import org.apache.hadoop.fs.azurebfs.services.SharedKeyCredentials;
 import org.apache.hadoop.fs.azurebfs.services.StaticRetryPolicy;
 import org.apache.hadoop.fs.azurebfs.services.TailLatencyRequestTimeoutRetryPolicy;
@@ -946,12 +950,30 @@ public AbfsInputStream openFileForRead(Path path,
 
       perfInfo.registerSuccess(true);
 
-      // Add statistics for InputStream
-      return new AbfsInputStream(getClient(), statistics, relativePath,
-          contentLength, populateAbfsInputStreamContext(
-          parameters.map(OpenFileParameters::getOptions),
-          contextEncryptionAdapter),
-          eTag, tracingContext);
+      AbfsInputPolicy inputPolicy = AbfsInputPolicy.getPolicy(getAbfsConfiguration().getAbfsReadPolicy());
+      switch (inputPolicy) {
+        case SEQUENTIAL:
+          return new AbfsPrefetchInputStream(getClient(), statistics, relativePath,
+                  contentLength, populateAbfsInputStreamContext(
+                  parameters.map(OpenFileParameters::getOptions),
+                  contextEncryptionAdapter),
+                  eTag, tracingContext);
+
+        case RANDOM:
+          return new AbfsRandomInputStream(getClient(), statistics, relativePath,
+                  contentLength, populateAbfsInputStreamContext(
+                  parameters.map(OpenFileParameters::getOptions),
+                  contextEncryptionAdapter),
+                  eTag, tracingContext);
+
+        case ADAPTIVE:
+        default:
+          return new AbfsAdaptiveInputStream(getClient(), statistics, relativePath,
+                  contentLength, populateAbfsInputStreamContext(
+                  parameters.map(OpenFileParameters::getOptions),
+                  contextEncryptionAdapter),
+                  eTag, tracingContext);
+      }
     }
   }
 

diff --git a/...hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/constants/ConfigurationKeys.java b/...hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/constants/ConfigurationKeys.java
@@ -21,6 +21,7 @@
 import org.apache.hadoop.classification.InterfaceAudience;
 import org.apache.hadoop.classification.InterfaceStability;
 import org.apache.hadoop.fs.FileSystem;
+import org.apache.hadoop.fs.Options;
 
 import static org.apache.hadoop.fs.azurebfs.constants.AbfsHttpConstants.DOT;
 
@@ -215,6 +216,12 @@ public final class ConfigurationKeys {
   public static final String FS_AZURE_READ_AHEAD_QUEUE_DEPTH = "fs.azure.readaheadqueue.depth";
   public static final String FS_AZURE_ALWAYS_READ_BUFFER_SIZE = "fs.azure.read.alwaysReadBufferSize";
   public static final String FS_AZURE_READ_AHEAD_BLOCK_SIZE = "fs.azure.read.readahead.blocksize";
+  /**
+   * Provides hint for the read workload pattern.
+   * Possible Values Exposed in {@link Options.OpenFileOptions}
+   */
+  public static final String FS_AZURE_READ_POLICY = "fs.azure.read.policy";
+
   /** Provides a config control to enable or disable ABFS Flush operations -
    *  HFlush and HSync. Default is true. **/
   public static final String FS_AZURE_ENABLE_FLUSH = "fs.azure.enable.flush";

diff --git a/...azure/src/main/java/org/apache/hadoop/fs/azurebfs/constants/FileSystemConfigurations.java b/...azure/src/main/java/org/apache/hadoop/fs/azurebfs/constants/FileSystemConfigurations.java
@@ -22,6 +22,7 @@
 import org.apache.hadoop.classification.InterfaceStability;
 import org.apache.hadoop.security.ssl.DelegatingSSLSocketFactory;
 
+import static org.apache.hadoop.fs.Options.OpenFileOptions.FS_OPTION_OPENFILE_READ_POLICY_ADAPTIVE;
 import static org.apache.hadoop.fs.azurebfs.constants.AbfsHttpConstants.EMPTY_STRING;
 
 /**
@@ -93,7 +94,7 @@

  /** Default buffer sizes and optimization flags. */
  public static final int DEFAULT_WRITE_BUFFER_SIZE = 8 * ONE_MB;  // 8 MB
  public static final int APPENDBLOB_MAX_WRITE_BUFFER_SIZE = 4 * ONE_MB;  // 4 MB
  public static final boolean DEFAULT_AZURE_ENABLE_SMALL_WRITE_OPTIMIZATION = false;
  public static final int DEFAULT_READ_BUFFER_SIZE = 4 * ONE_MB;  // 4 MB
  public static final boolean DEFAULT_READ_SMALL_FILES_COMPLETELY = false;
@@ -108,6 +109,7 @@
   public static final long MAX_AZURE_BLOCK_SIZE = 256 * 1024 * 1024L; // changing default abfs blocksize to 256MB
   public static final String AZURE_BLOCK_LOCATION_HOST_DEFAULT = "localhost";
   public static final int DEFAULT_AZURE_LIST_MAX_RESULTS = 5000;
+  public static final String DEFAULT_FS_AZURE_READ_POLICY = FS_OPTION_OPENFILE_READ_POLICY_ADAPTIVE;
 
   public static final String SERVER_SIDE_ENCRYPTION_ALGORITHM = "AES256";
 
@@ -416,7 +418,7 @@
 
   public static final boolean DEFAULT_FS_AZURE_ENABLE_CREATE_BLOB_IDEMPOTENCY = true;
 
-  public static final boolean DEFAULT_FS_AZURE_ENABLE_PREFETCH_REQUEST_PRIORITY = true;
+  public static final boolean DEFAULT_FS_AZURE_ENABLE_PREFETCH_REQUEST_PRIORITY = false;
 
   // The default traffic request priority is 3 (from service side)
   // The lowest priority a request can get is 7

diff --git a/...op-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/constants/ReadType.java b/...op-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/constants/ReadType.java
@@ -48,6 +48,10 @@ public enum ReadType {
    * Only triggered when small file read optimization kicks in.
    */
   SMALLFILE_READ("SR"),
+  /**
+   * Reads from Random Input Stream with read ahead up to readAheadRange
+   */
+  RANDOM_READ("RR"),
   /**
    * None of the above read types were applicable.
    */

diff --git a/...p-azure/src/main/java/org/apache/hadoop/fs/azurebfs/services/AbfsAdaptiveInputStream.java b/...p-azure/src/main/java/org/apache/hadoop/fs/azurebfs/services/AbfsAdaptiveInputStream.java
@@ -0,0 +1,109 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hadoop.fs.azurebfs.services;
+
+import java.io.IOException;
+
+import org.apache.hadoop.fs.FileSystem;
+import org.apache.hadoop.fs.azurebfs.constants.ReadType;
+import org.apache.hadoop.fs.azurebfs.utils.TracingContext;
+
+import static java.lang.Math.max;
+
+/**
+ * Input stream implementation optimized for adaptive read patterns.
+ * This is the default implementation used for cases where user does not specify any input policy.
+ * It switches between sequential and random read optimizations based on the detected read pattern.
+ * It also keeps footer read and small file optimizations enabled.
+ */
+public class AbfsAdaptiveInputStream extends AbfsInputStream {
+
+  public AbfsAdaptiveInputStream(
+      final AbfsClient client,
+      final FileSystem.Statistics statistics,
+      final String path,
+      final long contentLength,
+      final AbfsInputStreamContext abfsInputStreamContext,
+      final String eTag,
+      TracingContext tracingContext) {
+    super(client, statistics, path, contentLength,
+        abfsInputStreamContext, eTag, tracingContext);
+  }
+
+  /**
+   * {@inheritDoc}
+   */
+  @Override
+  protected int readOneBlock(final byte[] b, final int off, final int len) throws IOException {
+    if (len == 0) {
+      return 0;
+    }
+    if (!validate(b, off, len)) {
+      return -1;
+    }
+    //If buffer is empty, then fill the buffer.
+    if (bCursor == limit) {
+      //If EOF, then return -1
+      if (fCursor >= contentLength) {
+        return -1;
+      }
+
+      long bytesRead = 0;
+      //reset buffer to initial state - i.e., throw away existing data
+      bCursor = 0;
+      limit = 0;
+      if (buffer == null) {
+        LOG.debug("created new buffer size {}", bufferSize);
+        buffer = new byte[bufferSize];
+      }
+
+      // Reset Read Type back to normal and set again based on code flow.
+      tracingContext.setReadType(ReadType.NORMAL_READ);
+      if (alwaysReadBufferSize) {
+        bytesRead = readInternal(fCursor, buffer, 0, bufferSize, false);
+      } else {
+        // Enable readAhead when reading sequentially
+        if (-1 == fCursorAfterLastRead || fCursorAfterLastRead == fCursor || b.length >= bufferSize) {
+          LOG.debug("Sequential read with read ahead size of {}", bufferSize);
+          bytesRead = readInternal(fCursor, buffer, 0, bufferSize, false);
+        } else {
+          /*
+           * Disable queuing prefetches when random read pattern detected.
+           * Instead, read ahead only for readAheadRange above what is asked by caller.
+           */
+          tracingContext.setReadType(ReadType.RANDOM_READ);
+          int lengthWithReadAhead = Math.min(b.length + readAheadRange, bufferSize);
+          LOG.debug("Random read with read ahead size of {}", lengthWithReadAhead);
+          bytesRead = readInternal(fCursor, buffer, 0, lengthWithReadAhead, true);
+        }
+      }
+      if (firstRead) {
+        firstRead = false;
+      }
+      if (bytesRead == -1) {
+        return -1;
+      }
+
+      limit += bytesRead;
+      fCursor += bytesRead;
+      fCursorAfterLastRead = fCursor;
+    }
+    return copyToUserBuffer(b, off, len);
+  }
+}
diff --git a/...ls/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/services/AbfsInputPolicy.java b/...ls/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/services/AbfsInputPolicy.java
@@ -0,0 +1,78 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hadoop.fs.azurebfs.services;
+
+import java.util.Locale;
+
+import static org.apache.hadoop.fs.Options.OpenFileOptions.FS_OPTION_OPENFILE_READ_POLICY_ADAPTIVE;
+import static org.apache.hadoop.fs.Options.OpenFileOptions.FS_OPTION_OPENFILE_READ_POLICY_COLUMNAR;
+import static org.apache.hadoop.fs.Options.OpenFileOptions.FS_OPTION_OPENFILE_READ_POLICY_ORC;
+import static org.apache.hadoop.fs.Options.OpenFileOptions.FS_OPTION_OPENFILE_READ_POLICY_PARQUET;
+import static org.apache.hadoop.fs.Options.OpenFileOptions.FS_OPTION_OPENFILE_READ_POLICY_RANDOM;
+import static org.apache.hadoop.fs.Options.OpenFileOptions.FS_OPTION_OPENFILE_READ_POLICY_SEQUENTIAL;
+import static org.apache.hadoop.fs.Options.OpenFileOptions.FS_OPTION_OPENFILE_READ_POLICY_WHOLE_FILE;
+
+/**
+ * Enum for ABFS Input Policies.
+ * Each policy maps to a particular implementation of {@link AbfsInputStream}
+ */
+public enum AbfsInputPolicy {
+
+  SEQUENTIAL(FS_OPTION_OPENFILE_READ_POLICY_SEQUENTIAL),
+  RANDOM(FS_OPTION_OPENFILE_READ_POLICY_RANDOM),
+  ADAPTIVE(FS_OPTION_OPENFILE_READ_POLICY_ADAPTIVE);
+
+  private final String policy;
+
+  AbfsInputPolicy(String policy) {
+    this.policy = policy;
+  }
+
+  @Override
+  public String toString() {
+    return policy;
+  }
+
+  /**
+   * Get the enum constant from the string name.
+   * @param name policy name as configured by user
+   * @return the corresponding AbsInputPolicy to be used
+   */
+  public static AbfsInputPolicy getPolicy(String name) {
+    String trimmed = name.trim().toLowerCase(Locale.ENGLISH);
+    switch (trimmed) {
+    // all these options currently map to random IO.
+    case FS_OPTION_OPENFILE_READ_POLICY_RANDOM:
+    case FS_OPTION_OPENFILE_READ_POLICY_COLUMNAR:
+    case FS_OPTION_OPENFILE_READ_POLICY_ORC:
+    case FS_OPTION_OPENFILE_READ_POLICY_PARQUET:
+      return RANDOM;
+
+    // handle the sequential formats.
+    case FS_OPTION_OPENFILE_READ_POLICY_SEQUENTIAL:
+    case FS_OPTION_OPENFILE_READ_POLICY_WHOLE_FILE:
+      return SEQUENTIAL;
+
+    // Everything else including ABFS Default Policy maps to Adaptive
+    case FS_OPTION_OPENFILE_READ_POLICY_ADAPTIVE:
+    default:
+      return ADAPTIVE;
+    }
+  }
+}