apache · anujmodi2021 · Aug 5, 2025 · Jul 30, 2025 · Jul 30, 2025 · Jul 30, 2025
diff --git a/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/AbfsConfiguration.java b/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/AbfsConfiguration.java
@@ -138,6 +138,11 @@ public class AbfsConfiguration{
           DefaultValue = DEFAULT_FOOTER_READ_BUFFER_SIZE)
   private int footerReadBufferSize;
 
+  @BooleanConfigurationValidatorAnnotation(
+      ConfigurationKey = FS_AZURE_BUFFERED_PREAD_DISABLE,
+      DefaultValue = DEFAULT_BUFFERED_PREAD_DISABLE)
+  private boolean isBufferedPReadDisabled;
+
   @BooleanConfigurationValidatorAnnotation(
       ConfigurationKey = FS_AZURE_ACCOUNT_IS_EXPECT_HEADER_ENABLED,
       DefaultValue = DEFAULT_FS_AZURE_ACCOUNT_IS_EXPECT_HEADER_ENABLED)
@@ -953,6 +958,14 @@ public int getFooterReadBufferSize() {
     return this.footerReadBufferSize;
   }
 
+  /**
+   * Returns whether the buffered pread is disabled.
+   * @return true if buffered pread is disabled, false otherwise.
+   */
+  public boolean isBufferedPReadDisabled() {
+    return this.isBufferedPReadDisabled;
+  }
+
   public int getReadBufferSize() {
     return this.readBufferSize;
   }

diff --git a/...ls/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/AzureBlobFileSystemStore.java b/...ls/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/AzureBlobFileSystemStore.java
@@ -945,8 +945,9 @@ contentLength, populateAbfsInputStreamContext(
   private AbfsInputStreamContext populateAbfsInputStreamContext(
       Optional<Configuration> options, ContextEncryptionAdapter contextEncryptionAdapter) {
     boolean bufferedPreadDisabled = options
-        .map(c -> c.getBoolean(FS_AZURE_BUFFERED_PREAD_DISABLE, false))
-        .orElse(false);
+        .map(c -> c.getBoolean(FS_AZURE_BUFFERED_PREAD_DISABLE,
+            getAbfsConfiguration().isBufferedPReadDisabled()))
+        .orElse(getAbfsConfiguration().isBufferedPReadDisabled());
     int footerReadBufferSize = options.map(c -> c.getInt(
         AZURE_FOOTER_READ_BUFFER_SIZE, getAbfsConfiguration().getFooterReadBufferSize()))
         .orElse(getAbfsConfiguration().getFooterReadBufferSize());

diff --git a/...hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/constants/AbfsHttpConstants.java b/...hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/constants/AbfsHttpConstants.java
@@ -128,7 +128,6 @@ public final class AbfsHttpConstants {
   public static final String STAR = "*";
   public static final String COMMA = ",";
   public static final String COLON = ":";
-  public static final String HYPHEN = "-";
   public static final String EQUAL = "=";
   public static final String QUESTION_MARK = "?";
   public static final String AND_MARK = "&";

diff --git a/...azure/src/main/java/org/apache/hadoop/fs/azurebfs/constants/FileSystemConfigurations.java b/...azure/src/main/java/org/apache/hadoop/fs/azurebfs/constants/FileSystemConfigurations.java
@@ -75,6 +75,7 @@ public final class FileSystemConfigurations {
   public static final boolean DEFAULT_READ_SMALL_FILES_COMPLETELY = false;
   public static final boolean DEFAULT_OPTIMIZE_FOOTER_READ = true;
   public static final int DEFAULT_FOOTER_READ_BUFFER_SIZE = 512 * ONE_KB;
+  public static final boolean DEFAULT_BUFFERED_PREAD_DISABLE = false;
   public static final boolean DEFAULT_ALWAYS_READ_BUFFER_SIZE = false;
   public static final int DEFAULT_READ_AHEAD_BLOCK_SIZE = 4 * ONE_MB;
   public static final int DEFAULT_READ_AHEAD_RANGE = 64 * ONE_KB; // 64 KB

diff --git a/...op-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/constants/ReadType.java b/...op-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/constants/ReadType.java
@@ -59,6 +59,11 @@ public enum ReadType {
     this.readType = readType;
   }
 
+  /**
+   * Get the read type as a string.
+   *
+   * @return the read type string
+   */
   @Override
   public String toString() {
     return readType;

diff --git a/...ls/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/services/AbfsInputStream.java b/...ls/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/services/AbfsInputStream.java
@@ -436,7 +436,6 @@ private int optimisedRead(final byte[] b, final int off, final int len,
       }
     } catch (IOException e) {
       LOG.debug("Optimized read failed. Defaulting to readOneBlock {}", e);
-      tracingContext.setReadType(ReadType.NORMAL_READ);
       restorePointerState();
       return readOneBlock(b, off, len);
     } finally {
@@ -451,7 +450,6 @@ private int optimisedRead(final byte[] b, final int off, final int len,
     //  bCursor that means the user requested data has not been read.
     if (fCursor < contentLength && bCursor > limit) {
       restorePointerState();
-      tracingContext.setReadType(ReadType.NORMAL_READ);
       return readOneBlock(b, off, len);
     }
     return copyToUserBuffer(b, off, len);

diff --git a/...-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/utils/TracingContext.java b/...-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/utils/TracingContext.java
@@ -67,7 +67,7 @@ public class TracingContext {
   //final concatenated ID list set into x-ms-client-request-id header
   private String header = EMPTY_STRING;
   private String ingressHandler = EMPTY_STRING;
-  private String position = EMPTY_STRING;
+  private String position = String.valueOf(0); // position of read/write in remote file
   private String metricResults = EMPTY_STRING;
   private String metricHeader = EMPTY_STRING;
   private ReadType readType = ReadType.UNKNOWN_READ;
@@ -80,7 +80,7 @@ public class TracingContext {
    * will not change this field. In case {@link  #primaryRequestId} is non-null,
    * this field shall not be set.
    */
-  private String primaryRequestIdForRetry;
+  private String primaryRequestIdForRetry = EMPTY_STRING;
   private Integer operatedBlobCount = 1; // Only relevant for rename-delete over blob endpoint where it will be explicitly set.
 
   private static final Logger LOG = LoggerFactory.getLogger(AbfsClient.class);
@@ -200,8 +200,8 @@ public void setListener(Listener listener) {
    *   <li>ingressHandler</li>
    *   <li>position of read/write in the remote file</li>
    *   <li>operatedBlobCount - number of blobs operated on by this request</li>
-   *   <li>httpOperationHeader - suffix for network library used</li>
    *   <li>operationSpecificHeader - different operation types can publish info relevant to that operation</li>
+   *   <li>httpOperationHeader - suffix for network library used</li>
    * </ul>
    * @param httpOperation AbfsHttpOperation instance to set header into
    *                      connection
@@ -214,7 +214,7 @@ public void constructHeader(AbfsHttpOperation httpOperation, String previousFail
     clientRequestId = UUID.randomUUID().toString();
     switch (format) {
     case ALL_ID_FORMAT:
-      header = TracingHeaderVersion.V1.getVersion() + COLON
+      header = TracingHeaderVersion.getCurrentVersion() + COLON
           + clientCorrelationID + COLON
           + clientRequestId + COLON
           + fileSystemID + COLON
@@ -225,19 +225,19 @@ public void constructHeader(AbfsHttpOperation httpOperation, String previousFail
           + ingressHandler + COLON
           + position + COLON
           + operatedBlobCount + COLON
-          + httpOperation.getTracingContextSuffix() + COLON
-          + getOperationSpecificHeader(opType);
+          + getOperationSpecificHeader(opType) + COLON
+          + httpOperation.getTracingContextSuffix();
 
       metricHeader += !(metricResults.trim().isEmpty()) ? metricResults  : EMPTY_STRING;
       break;
     case TWO_ID_FORMAT:
-      header = TracingHeaderVersion.V1.getVersion() + COLON
+      header = TracingHeaderVersion.getCurrentVersion() + COLON
           + clientCorrelationID + COLON + clientRequestId;
       metricHeader += !(metricResults.trim().isEmpty()) ? metricResults  : EMPTY_STRING;
       break;
     default:
       //case SINGLE_ID_FORMAT
-      header = TracingHeaderVersion.V1.getVersion() + COLON
+      header = TracingHeaderVersion.getCurrentVersion() + COLON
           + clientRequestId;
       metricHeader += !(metricResults.trim().isEmpty()) ? metricResults  : EMPTY_STRING;
     }
@@ -275,17 +275,15 @@ private String getPrimaryRequestIdForHeader(final Boolean isRetry) {
     return primaryRequestIdForRetry;
   }
 
-  private String addFailureReasons(final String header,
-      final String previousFailure, String retryPolicyAbbreviation) {
-    if (previousFailure == null) {
-      return header;
-    }
-    if (CONNECTION_TIMEOUT_ABBREVIATION.equals(previousFailure) && retryPolicyAbbreviation != null) {
-      return String.format("%s_%s_%s", header, previousFailure, retryPolicyAbbreviation);
-    }
-    return String.format("%s_%s", header, previousFailure);
-  }
-
+  /**
+   * Get the retry header string in format retryCount_failureReason_retryPolicyAbbreviation
+   * retryCount is always there and 0 for first request.
+   * failureReason is null for first request
+   * retryPolicyAbbreviation is only present when request fails with ConnectionTimeout
+   * @param previousFailure Previous failure reason, null if not a retried request
+   * @param retryPolicyAbbreviation Abbreviation of retry policy used to get retry interval
+   * @return String representing the retry header
+   */
   private String getRetryHeader(final String previousFailure, String retryPolicyAbbreviation) {
     String retryHeader = String.format("%d", retryCount);
     if (previousFailure == null) {
@@ -297,6 +295,11 @@ private String getRetryHeader(final String previousFailure, String retryPolicyAb
     return String.format("%s_%s", retryHeader, previousFailure);
   }
 
+  /**
+   * Get the operation specific header for the current operation type.
+   * @param opType The operation type for which the header is needed
+   * @return String representing the operation specific header
+   */
   private String getOperationSpecificHeader(FSOperationType opType) {
     // Similar header can be added for other operations in the future.
     switch (opType) {
@@ -307,6 +310,10 @@ private String getOperationSpecificHeader(FSOperationType opType) {
     }
   }
 
+  /**
+   * Get the operation specific header for read operations.
+   * @return String representing the read specific header
+   */
   private String getReadSpecificHeader() {
     // More information on read can be added to this header in the future.
     // As underscore separated values.
@@ -372,14 +379,14 @@ public void setPosition(final String position) {
     }
   }
 
+  /**
+   * Sets the read type for the current operation.
+   * @param readType the read type to set, must not be null.
+   */
   public void setReadType(ReadType readType) {
     this.readType = readType;
     if (listener != null) {
       listener.updateReadType(readType);
     }
   }
-
-  public ReadType getReadType() {
-    return readType;
-  }
 }
diff --git a/.../hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/utils/TracingHeaderVersion.java b/.../hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/utils/TracingHeaderVersion.java
@@ -18,33 +18,55 @@
 
 package org.apache.hadoop.fs.azurebfs.utils;
 
+/**
+ * Enum representing the version of the tracing header used in Azure Blob File System (ABFS).
+ * It defines two versions: V0 and V1, with their respective field counts.
+ * Any changes to the tracing header should introduce a new version so that every
+ * version has a fixed predefined schema of fields.
+ */
 public enum TracingHeaderVersion {
 
+  /**
+   * Version 0 of the tracing header, which has no version prefix and contains 8 permanent and a few optional fields.
+   * This is the initial version of the tracing header.
+   */
   V0("", 8),
+  /**
+   * Version 1 of the tracing header, which includes a version prefix and has 13 permanent fields.
+   * This version is used for the current tracing header schema.
+   * Schema: version:clientCorrelationId:clientRequestId:fileSystemId
+   *         :primaryRequestId:streamId:opType:retryHeader:ingressHandler
+   *         :position:operatedBlobCount:operationSpecificHeader:httpOperationHeader
+   */
   V1("v1", 13);
 
-  private final String version;
+  private final String versionString;
   private final int fieldCount;
 
-  TracingHeaderVersion(String version, int fieldCount) {
-    this.version = version;
+  TracingHeaderVersion(String versionString, int fieldCount) {
+    this.versionString = versionString;
     this.fieldCount = fieldCount;
   }
 
   @Override
   public String toString() {
-    return version;
+    return versionString;
   }
 
+  /**
+   * Returns the latest version of the tracing header. Any changes done to the
+   * schema of tracing context header should be accompanied by a version bump.
+   * @return the latest version of the tracing header.
+   */
   public static TracingHeaderVersion getCurrentVersion() {
     return V1;
   }
 
   public int getFieldCount() {
-    return V1.fieldCount;
+    return fieldCount;
   }
 
-  public String getVersion() {
-    return V1.version;
+  public String getVersionString() {
+    return versionString;
   }
 }
diff --git a/...op-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azurebfs/TestTracingContext.java b/...op-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azurebfs/TestTracingContext.java
@@ -267,7 +267,7 @@ fileSystemId, FSOperationType.CREATE_FILESYSTEM, tracingHeaderFormat, new Tracin
     Mockito.doNothing().when(abfsHttpOperation).setRequestProperty(Mockito.anyString(), Mockito.anyString());
     tracingContext.constructHeader(abfsHttpOperation, null, EXPONENTIAL_RETRY_POLICY_ABBREVIATION);
     String header = tracingContext.getHeader();
-    String assertionPrimaryId = header.split(":")[3];
+    String assertionPrimaryId = header.split(COLON)[3];
 
     tracingContext.setRetryCount(1);
     tracingContext.setListener(new TracingHeaderValidator(
@@ -277,7 +277,7 @@ fileSystemId, FSOperationType.CREATE_FILESYSTEM, tracingHeaderFormat, new Tracin
 
     tracingContext.constructHeader(abfsHttpOperation, READ_TIMEOUT_ABBREVIATION, EXPONENTIAL_RETRY_POLICY_ABBREVIATION);
     header = tracingContext.getHeader();
-    String primaryRequestId = header.split(":")[3];
+    String primaryRequestId = header.split(COLON)[3];
 
     Assertions.assertThat(primaryRequestId)
         .describedAs("PrimaryRequestId in a retried request's tracingContext "
@@ -329,7 +329,7 @@ fileSystemId, FSOperationType.CREATE_FILESYSTEM, tracingHeaderFormat, new Tracin
   }
 
   private void checkHeaderForRetryPolicyAbbreviation(String header, String expectedFailureReason, String expectedRetryPolicyAbbreviation) {
-    String[] headerContents = header.split(":", SPLIT_NO_LIMIT);
+    String[] headerContents = header.split(COLON, SPLIT_NO_LIMIT);
     String previousReqContext = headerContents[7];
 
     if (expectedFailureReason != null) {