From e2eba214503db812a4f2f7e19f6e1417ba227929 Mon Sep 17 00:00:00 2001 From: bhattmanish98 Date: Thu, 6 Feb 2025 05:47:46 -0800 Subject: [PATCH 01/15] Create Rename Idempotency --- .../hadoop/fs/azurebfs/AbfsConfiguration.java | 8 +++ .../fs/azurebfs/AzureBlobFileSystemStore.java | 2 +- .../azurebfs/constants/ConfigurationKeys.java | 2 + .../constants/FileSystemConfigurations.java | 2 + .../constants/HttpHeaderConfigurations.java | 6 ++ .../fs/azurebfs/services/AbfsDfsClient.java | 69 +++++++++++++++++-- .../azurebfs/ITestAbfsNetworkStatistics.java | 5 ++ .../ITestAzureBlobFileSystemCreate.java | 24 ++++++- .../ITestAzureBlobFileSystemMkDir.java | 63 +++++++++++++++++ .../ITestAzureBlobFileSystemRename.java | 65 +++++++++++++++++ .../hadoop/fs/azurebfs/MockIntercept.java | 27 ++++++++ .../fs/azurebfs/services/TestAbfsClient.java | 24 +++++++ .../services/TestAbfsRenameRetryRecovery.java | 22 ++++-- 13 files changed, 306 insertions(+), 13 deletions(-) create mode 100644 hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azurebfs/MockIntercept.java diff --git a/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/AbfsConfiguration.java b/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/AbfsConfiguration.java index e0cb36201065d..6a51f8d902038 100644 --- a/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/AbfsConfiguration.java +++ b/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/AbfsConfiguration.java @@ -440,6 +440,10 @@ public class AbfsConfiguration{ DefaultValue = DEFAULT_HTTP_CLIENT_CONN_MAX_IDLE_TIME) private long maxApacheHttpClientConnectionIdleTime; + @BooleanConfigurationValidatorAnnotation(ConfigurationKey = FS_AZURE_ENABLE_CLIENT_TRANSACTION_ID, + DefaultValue = DEFAULT_FS_AZURE_ENABLE_CLIENT_TRANSACTION_ID) + private boolean enableClientTransactionId; + private String clientProvidedEncryptionKey; private String clientProvidedEncryptionKeySHA; @@ -1077,6 +1081,10 @@ public long getMaxApacheHttpClientConnectionIdleTime() { return maxApacheHttpClientConnectionIdleTime; } + public boolean getIsClientTransactionIdEnabled() { + return enableClientTransactionId; + } + /** * Enum config to allow user to pick format of x-ms-client-request-id header * @return tracingContextFormat config if valid, else default ALL_ID_FORMAT diff --git a/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/AzureBlobFileSystemStore.java b/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/AzureBlobFileSystemStore.java index 94a445a703953..a68cfc29cd5b7 100644 --- a/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/AzureBlobFileSystemStore.java +++ b/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/AzureBlobFileSystemStore.java @@ -1922,7 +1922,7 @@ private long extractContentLength(AbfsHttpOperation op) { long contentLength; String contentLengthHeader = op.getResponseHeader( HttpHeaderConfigurations.CONTENT_LENGTH); - if (!contentLengthHeader.equals(EMPTY_STRING)) { + if (!Strings.isNullOrEmpty(contentLengthHeader)) { contentLength = Long.parseLong(contentLengthHeader); } else { contentLength = 0; diff --git a/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/constants/ConfigurationKeys.java b/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/constants/ConfigurationKeys.java index 9c7ab42f370aa..3742361b48445 100644 --- a/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/constants/ConfigurationKeys.java +++ b/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/constants/ConfigurationKeys.java @@ -383,6 +383,8 @@ public static String accountProperty(String property, String account) { public static final String FS_AZURE_BLOB_DIR_RENAME_MAX_THREAD = "fs.azure.blob.dir.rename.max.thread"; /**Maximum number of thread per blob-delete orchestration: {@value}*/ public static final String FS_AZURE_BLOB_DIR_DELETE_MAX_THREAD = "fs.azure.blob.dir.delete.max.thread"; + /**Flag to enable/disable sending client transactional ID during create/rename operations: {@value}*/ + public static final String FS_AZURE_ENABLE_CLIENT_TRANSACTION_ID = "fs.azure.enable.client.transaction.id"; private ConfigurationKeys() {} } diff --git a/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/constants/FileSystemConfigurations.java b/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/constants/FileSystemConfigurations.java index 370787a69cc32..6072fcbb6fa08 100644 --- a/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/constants/FileSystemConfigurations.java +++ b/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/constants/FileSystemConfigurations.java @@ -198,5 +198,7 @@ public final class FileSystemConfigurations { public static final int DEFAULT_FS_AZURE_BLOB_DELETE_THREAD = DEFAULT_FS_AZURE_LISTING_ACTION_THREADS; + public static final boolean DEFAULT_FS_AZURE_ENABLE_CLIENT_TRANSACTION_ID = false; + private FileSystemConfigurations() {} } diff --git a/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/constants/HttpHeaderConfigurations.java b/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/constants/HttpHeaderConfigurations.java index e5909d22f48c6..b442b1f853347 100644 --- a/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/constants/HttpHeaderConfigurations.java +++ b/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/constants/HttpHeaderConfigurations.java @@ -132,5 +132,11 @@ public final class HttpHeaderConfigurations { */ public static final String X_MS_COPY_STATUS = "x-ms-copy-status"; + /** + * Http Request Header for create rename idempotence. + * {@value} + */ + public static final String X_MS_CLIENT_TRANSACTION_ID = "x-ms-client-transaction-id"; + private HttpHeaderConfigurations() {} } diff --git a/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/services/AbfsDfsClient.java b/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/services/AbfsDfsClient.java index 71b89147017d7..7f2a7fe6633b6 100644 --- a/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/services/AbfsDfsClient.java +++ b/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/services/AbfsDfsClient.java @@ -106,6 +106,7 @@ import static org.apache.hadoop.fs.azurebfs.constants.HttpHeaderConfigurations.RANGE; import static org.apache.hadoop.fs.azurebfs.constants.HttpHeaderConfigurations.USER_AGENT; import static org.apache.hadoop.fs.azurebfs.constants.HttpHeaderConfigurations.X_HTTP_METHOD_OVERRIDE; +import static org.apache.hadoop.fs.azurebfs.constants.HttpHeaderConfigurations.X_MS_CLIENT_TRANSACTION_ID; import static org.apache.hadoop.fs.azurebfs.constants.HttpHeaderConfigurations.X_MS_EXISTING_RESOURCE_TYPE; import static org.apache.hadoop.fs.azurebfs.constants.HttpHeaderConfigurations.X_MS_LEASE_ACTION; import static org.apache.hadoop.fs.azurebfs.constants.HttpHeaderConfigurations.X_MS_LEASE_BREAK_PERIOD; @@ -378,6 +379,9 @@ public AbfsRestOperation createPath(final String path, requestHeaders.add(new AbfsHttpHeader(IF_MATCH, eTag)); } + // Add the client transaction ID to the request headers. + String clientTransactionId = addClientTransactionIdToHeader(requestHeaders); + final AbfsUriQueryBuilder abfsUriQueryBuilder = createDefaultUriQueryBuilder(); abfsUriQueryBuilder.addQuery(QUERY_PARAM_RESOURCE, isFile ? FILE : DIRECTORY); if (isAppendBlob) { @@ -400,11 +404,27 @@ public AbfsRestOperation createPath(final String path, if (!op.hasResult()) { throw ex; } - if (!isFile && op.getResult().getStatusCode() == HttpURLConnection.HTTP_CONFLICT) { - String existingResource = - op.getResult().getResponseHeader(X_MS_EXISTING_RESOURCE_TYPE); - if (existingResource != null && existingResource.equals(DIRECTORY)) { - return op; //don't throw ex on mkdirs for existing directory + if (op.getResult().getStatusCode() == HttpURLConnection.HTTP_CONFLICT) { + if (!isFile) { + String existingResource = + op.getResult().getResponseHeader(X_MS_EXISTING_RESOURCE_TYPE); + if (existingResource != null && existingResource.equals(DIRECTORY)) { + return op; //don't throw ex on mkdirs for existing directory + } + } else if (clientTransactionId != null) { + try { + final AbfsHttpOperation getPathStatusOp = + getPathStatus(path, false, + tracingContext, null).getResult(); + if (clientTransactionId.equals( + getPathStatusOp.getResponseHeader( + X_MS_CLIENT_TRANSACTION_ID))) { + return op; + } + } catch (AzureBlobFileSystemException ignored) { + // In case of get path status failure, + // we will throw the original exception. + } } } throw ex; @@ -611,6 +631,9 @@ public AbfsClientRenameResult renamePath( requestHeaders.add(new AbfsHttpHeader(X_MS_RENAME_SOURCE, encodedRenameSource)); requestHeaders.add(new AbfsHttpHeader(IF_NONE_MATCH, STAR)); + // Add the client transaction ID to the request headers. + String clientTransactionId = addClientTransactionIdToHeader(requestHeaders); + final AbfsUriQueryBuilder abfsUriQueryBuilder = createDefaultUriQueryBuilder(); abfsUriQueryBuilder.addQuery(QUERY_PARAM_CONTINUATION, continuation); appendSASTokenToQuery(destination, @@ -635,6 +658,25 @@ public AbfsClientRenameResult renamePath( throw e; } + if (clientTransactionId != null && SOURCE_PATH_NOT_FOUND.getErrorCode() + .equalsIgnoreCase(op.getResult().getStorageErrorCode())) { + try { + final AbfsHttpOperation abfsHttpOperation = + getPathStatus(destination, false, + tracingContext, null).getResult(); + if (clientTransactionId.equals( + abfsHttpOperation.getResponseHeader( + X_MS_CLIENT_TRANSACTION_ID))) { + return new AbfsClientRenameResult(op, true, + isMetadataIncompleteState); + } + } catch (AbfsRestOperationException ignored) { + // In case of get path status failure, + // we will throw the original exception. + } + throw e; + } + // ref: HADOOP-18242. Rename failure occurring due to a rare case of // tracking metadata being in incomplete state. if (op.getResult().getStorageErrorCode() @@ -1511,4 +1553,21 @@ private Hashtable parseCommaSeparatedXmsProperties(String xMsPro return properties; } + + /** + * Add the client transaction id to the request header + * if {@link AbfsConfiguration#getIsClientTransactionIdEnabled()} is enabled. + * @param requestHeaders list of headers to be sent with the request + * + * @return client transaction id + */ + private String addClientTransactionIdToHeader(List requestHeaders) { + String clientTransactionId = null; + if (getAbfsConfiguration().getIsClientTransactionIdEnabled()) { + clientTransactionId = UUID.randomUUID().toString(); + requestHeaders.add( + new AbfsHttpHeader(X_MS_CLIENT_TRANSACTION_ID, clientTransactionId)); + } + return clientTransactionId; + } } diff --git a/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azurebfs/ITestAbfsNetworkStatistics.java b/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azurebfs/ITestAbfsNetworkStatistics.java index 0de2e40ce3d8c..204a137c280ad 100644 --- a/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azurebfs/ITestAbfsNetworkStatistics.java +++ b/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azurebfs/ITestAbfsNetworkStatistics.java @@ -185,6 +185,11 @@ public void testAbfsHttpSendStatistics() throws IOException { } else { expectedConnectionsMade += 3; expectedRequestsSent += 2; + if (fs.getAbfsStore().getAbfsConfiguration() + .getIsClientTransactionIdEnabled()) { + // 1 extra connection for the HEAD request to get the source path status + expectedConnectionsMade++; + } } } else { expectedConnectionsMade += 1; diff --git a/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azurebfs/ITestAzureBlobFileSystemCreate.java b/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azurebfs/ITestAzureBlobFileSystemCreate.java index 81b86088c561f..004361e3660ac 100644 --- a/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azurebfs/ITestAzureBlobFileSystemCreate.java +++ b/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azurebfs/ITestAzureBlobFileSystemCreate.java @@ -479,8 +479,16 @@ public void testCreateFileOverwrite(boolean enableConditionalCreateOverwrite) // Only single tryGetFileStatus should happen // 1. getFileStatus on DFS endpoint : 1 // getFileStatus on Blob endpoint: 1 (No Additional List blob call as file exists) - - createRequestCount += (client instanceof AbfsBlobClient && !getIsNamespaceEnabled(fs) ? 2: 1); + if (client instanceof AbfsBlobClient && !getIsNamespaceEnabled(fs)) { + createRequestCount += 2; + } else { + createRequestCount += 1; + if (fs.getAbfsStore().getAbfsConfiguration() + .getIsClientTransactionIdEnabled()) { + // 1 extra connection for the HEAD request to get the source path status + createRequestCount += 1; + } + } assertAbfsStatistics( CONNECTIONS_MADE, @@ -521,7 +529,17 @@ public void testCreateFileOverwrite(boolean enableConditionalCreateOverwrite) // 1. create without overwrite // 2. GetFileStatus to get eTag // 3. create with overwrite - createRequestCount += (client instanceof AbfsBlobClient && !getIsNamespaceEnabled(fs) ? 4: 3); + if (client instanceof AbfsBlobClient && !getIsNamespaceEnabled(fs)) { + createRequestCount += 4; + } else { + createRequestCount += 3; + if (fs.getAbfsStore().getAbfsConfiguration() + .getIsClientTransactionIdEnabled()) { + // 1 extra connection for the HEAD request to get the source path status + createRequestCount += 1; + } + } + } else { createRequestCount++; } diff --git a/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azurebfs/ITestAzureBlobFileSystemMkDir.java b/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azurebfs/ITestAzureBlobFileSystemMkDir.java index 0c50e279df27a..0786eed174fea 100644 --- a/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azurebfs/ITestAzureBlobFileSystemMkDir.java +++ b/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azurebfs/ITestAzureBlobFileSystemMkDir.java @@ -18,24 +18,38 @@ package org.apache.hadoop.fs.azurebfs; +import java.util.ArrayList; +import java.util.List; import java.util.UUID; import org.junit.Assume; import org.junit.Test; import org.mockito.Mockito; +import org.mockito.invocation.InvocationOnMock; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.FileAlreadyExistsException; import org.apache.hadoop.fs.FileSystem; import org.apache.hadoop.fs.Path; +import org.apache.hadoop.fs.azurebfs.contracts.exceptions.AbfsRestOperationException; +import org.apache.hadoop.fs.azurebfs.contracts.services.AzureServiceErrorCode; +import org.apache.hadoop.fs.azurebfs.security.ContextEncryptionAdapter; import org.apache.hadoop.fs.azurebfs.services.AbfsBlobClient; import org.apache.hadoop.fs.azurebfs.services.AbfsClient; +import org.apache.hadoop.fs.azurebfs.services.AbfsHttpHeader; +import org.apache.hadoop.fs.azurebfs.services.AbfsHttpOperation; +import org.apache.hadoop.fs.azurebfs.services.AbfsRestOperation; +import org.apache.hadoop.fs.azurebfs.services.TestAbfsClient; +import org.apache.hadoop.fs.azurebfs.utils.TracingContext; +import static java.net.HttpURLConnection.HTTP_CONFLICT; import static org.apache.hadoop.fs.azurebfs.AbfsStatistic.CONNECTIONS_MADE; import static org.apache.hadoop.fs.azurebfs.constants.ConfigurationKeys.FS_AZURE_ENABLE_MKDIR_OVERWRITE; import static org.apache.hadoop.fs.azurebfs.constants.FileSystemConfigurations.DEFAULT_FS_AZURE_ENABLE_MKDIR_OVERWRITE; +import static org.apache.hadoop.fs.azurebfs.constants.HttpHeaderConfigurations.X_MS_CLIENT_TRANSACTION_ID; import static org.apache.hadoop.fs.contract.ContractTestUtils.assertMkdirs; import static org.apache.hadoop.test.LambdaTestUtils.intercept; +import static org.mockito.ArgumentMatchers.nullable; /** * Test mkdir operation. @@ -167,4 +181,53 @@ public void testMkdirWithExistingFilename() throws Exception { intercept(FileAlreadyExistsException.class, () -> fs.mkdirs(new Path("/testFilePath"))); intercept(FileAlreadyExistsException.class, () -> fs.mkdirs(new Path("/testFilePath/newDir"))); } + + @Test + public void createPathRetryIdempotency() throws Exception { + final AzureBlobFileSystem currentFs = getFileSystem(); + Configuration config = new Configuration(this.getRawConfiguration()); + final AzureBlobFileSystem fs = (AzureBlobFileSystem) FileSystem.newInstance(currentFs.getUri(), config); + AbfsClient abfsClient = Mockito.spy(fs.getAbfsClient()); + fs.getAbfsStore().setClient(abfsClient); + final Path nonOverwriteFile = new Path("/NonOverwriteTest_FileName_" + UUID.randomUUID()); + final List headers = new ArrayList<>(); + TestAbfsClient.mockAbfsOperationCreation(abfsClient, new MockIntercept() { + private int count = 0; + @Override + public void answer(final AbfsRestOperation mockedObj, + final InvocationOnMock answer) throws AbfsRestOperationException { + if (count == 0) { + count = 1; + AbfsHttpOperation op = Mockito.mock(AbfsHttpOperation.class); + Mockito.doReturn("PUT").when(op).getMethod(); + Mockito.doReturn("").when(op).getStorageErrorMessage(); + Mockito.doReturn(true).when(mockedObj).hasResult(); + Mockito.doReturn(op).when(mockedObj).getResult(); + Mockito.doReturn(HTTP_CONFLICT).when(op).getStatusCode(); + headers.addAll(mockedObj.getRequestHeaders()); + throw new AbfsRestOperationException(HTTP_CONFLICT, AzureServiceErrorCode.PATH_CONFLICT.getErrorCode(), + "", null, op); + } + } + }); + AbfsRestOperation getPathRestOp = Mockito.mock(AbfsRestOperation.class); + AbfsHttpOperation op = Mockito.mock(AbfsHttpOperation.class); + Mockito.doAnswer(answer -> { + String requiredHeader = null; + for (AbfsHttpHeader httpHeader : headers) { + if (X_MS_CLIENT_TRANSACTION_ID.equalsIgnoreCase(httpHeader.getName())) { + requiredHeader = httpHeader.getValue(); + break; + } + } + return requiredHeader; + }).when(op).getResponseHeader(X_MS_CLIENT_TRANSACTION_ID); + Mockito.doReturn(true).when(getPathRestOp).hasResult(); + Mockito.doReturn(op).when(getPathRestOp).getResult(); + Mockito.doReturn(getPathRestOp).when(abfsClient).getPathStatus( + nullable(String.class), nullable(Boolean.class), + nullable(TracingContext.class), nullable(ContextEncryptionAdapter.class)); + + fs.create(nonOverwriteFile, false); + } } diff --git a/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azurebfs/ITestAzureBlobFileSystemRename.java b/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azurebfs/ITestAzureBlobFileSystemRename.java index abd45eae0e087..7867358149677 100644 --- a/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azurebfs/ITestAzureBlobFileSystemRename.java +++ b/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azurebfs/ITestAzureBlobFileSystemRename.java @@ -33,6 +33,7 @@ import org.assertj.core.api.Assertions; import org.junit.Test; +import org.mockito.invocation.InvocationOnMock; import org.mockito.Mockito; import org.mockito.stubbing.Answer; @@ -50,12 +51,14 @@ import org.apache.hadoop.fs.azurebfs.services.AbfsClient; import org.apache.hadoop.fs.azurebfs.services.AbfsClientTestUtil; import org.apache.hadoop.fs.azurebfs.services.AbfsDfsClient; +import org.apache.hadoop.fs.azurebfs.services.AbfsHttpHeader; import org.apache.hadoop.fs.azurebfs.services.AbfsHttpOperation; import org.apache.hadoop.fs.azurebfs.services.AbfsLease; import org.apache.hadoop.fs.azurebfs.services.AbfsRestOperation; import org.apache.hadoop.fs.azurebfs.services.BlobRenameHandler; import org.apache.hadoop.fs.azurebfs.services.RenameAtomicity; import org.apache.hadoop.fs.azurebfs.services.RenameAtomicityTestUtils; +import org.apache.hadoop.fs.azurebfs.services.TestAbfsClient; import org.apache.hadoop.fs.azurebfs.utils.TracingContext; import org.apache.hadoop.fs.azurebfs.utils.TracingHeaderValidator; import org.apache.hadoop.fs.statistics.IOStatisticAssertions; @@ -70,8 +73,11 @@ import static org.apache.hadoop.fs.azurebfs.constants.AbfsHttpConstants.COPY_STATUS_ABORTED; import static org.apache.hadoop.fs.azurebfs.constants.AbfsHttpConstants.COPY_STATUS_FAILED; import static org.apache.hadoop.fs.azurebfs.constants.AbfsHttpConstants.COPY_STATUS_PENDING; +import static org.apache.hadoop.fs.azurebfs.constants.AbfsHttpConstants.DIRECTORY; import static org.apache.hadoop.fs.azurebfs.constants.AbfsHttpConstants.ROOT_PATH; import static org.apache.hadoop.fs.azurebfs.constants.ConfigurationKeys.FS_AZURE_LEASE_THREADS; +import static org.apache.hadoop.fs.azurebfs.constants.HttpHeaderConfigurations.X_MS_CLIENT_TRANSACTION_ID; +import static org.apache.hadoop.fs.azurebfs.constants.HttpHeaderConfigurations.X_MS_RESOURCE_TYPE; import static org.apache.hadoop.fs.azurebfs.contracts.services.AzureServiceErrorCode.COPY_BLOB_ABORTED; import static org.apache.hadoop.fs.azurebfs.contracts.services.AzureServiceErrorCode.COPY_BLOB_FAILED; import static org.apache.hadoop.fs.azurebfs.contracts.services.AzureServiceErrorCode.SOURCE_PATH_NOT_FOUND; @@ -1641,4 +1647,63 @@ public void testRenameSrcDirDeleteEmitDeletionCountInClientRequestId() Mockito.any(TracingContext.class)); fs.rename(new Path(dirPathStr), new Path("/dst/")); } + + @Test + public void renamePathRetryIdempotency() throws Exception { + final AzureBlobFileSystem currentFs = getFileSystem(); + Configuration config = new Configuration(this.getRawConfiguration()); + final AzureBlobFileSystem fs = + (AzureBlobFileSystem) FileSystem.newInstance(currentFs.getUri(), + config); + AbfsClient abfsClient = Mockito.spy(fs.getAbfsClient()); + fs.getAbfsStore().setClient(abfsClient); + Path sourceDir = path("/testSrc"); + assertMkdirs(fs, sourceDir); + String filename = "file1"; + Path sourceFilePath = new Path(sourceDir, filename); + touch(sourceFilePath); + Path destFilePath = new Path(sourceDir, "file2"); + final List headers = new ArrayList<>(); + TestAbfsClient.mockAbfsOperationCreation(abfsClient, + new MockIntercept() { + private int count = 0; + @Override + public void answer(final AbfsRestOperation mockedObj, + final InvocationOnMock answer) throws AbfsRestOperationException { + if (count == 0) { + count = 1; + AbfsHttpOperation op = Mockito.mock(AbfsHttpOperation.class); + Mockito.doReturn("PUT").when(op).getMethod(); + Mockito.doReturn("").when(op).getStorageErrorMessage(); + Mockito.doReturn(SOURCE_PATH_NOT_FOUND.getErrorCode()).when(op) + .getStorageErrorCode(); + Mockito.doReturn(true).when(mockedObj).hasResult(); + Mockito.doReturn(op).when(mockedObj).getResult(); + Mockito.doReturn(HTTP_NOT_FOUND).when(op).getStatusCode(); + headers.addAll(mockedObj.getRequestHeaders()); + throw new AbfsRestOperationException(HTTP_NOT_FOUND, SOURCE_PATH_NOT_FOUND.getErrorCode(), + "", null, op); + } + } + }); + AbfsRestOperation getPathRestOp = Mockito.mock(AbfsRestOperation.class); + AbfsHttpOperation op = Mockito.mock(AbfsHttpOperation.class); + Mockito.doAnswer(answer -> { + String requiredHeader = null; + for (AbfsHttpHeader httpHeader : headers) { + if (X_MS_CLIENT_TRANSACTION_ID.equalsIgnoreCase(httpHeader.getName())) { + requiredHeader = httpHeader.getValue(); + break; + } + } + return requiredHeader; + }).when(op).getResponseHeader(X_MS_CLIENT_TRANSACTION_ID); + Mockito.doReturn(true).when(getPathRestOp).hasResult(); + Mockito.doReturn(op).when(getPathRestOp).getResult(); + Mockito.doReturn(DIRECTORY).when(op).getResponseHeader(X_MS_RESOURCE_TYPE); + Mockito.doReturn(getPathRestOp).when(abfsClient).getPathStatus( + Mockito.nullable(String.class), Mockito.nullable(Boolean.class), + Mockito.nullable(TracingContext.class), Mockito.nullable(ContextEncryptionAdapter.class)); + fs.rename(sourceFilePath, destFilePath); + } } diff --git a/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azurebfs/MockIntercept.java b/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azurebfs/MockIntercept.java new file mode 100644 index 0000000000000..3e1ce2351ff41 --- /dev/null +++ b/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azurebfs/MockIntercept.java @@ -0,0 +1,27 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.fs.azurebfs; + +import org.mockito.invocation.InvocationOnMock; + +import org.apache.hadoop.fs.azurebfs.contracts.exceptions.AbfsRestOperationException; + +public interface MockIntercept { + void answer(T mockedObj, InvocationOnMock answer) throws AbfsRestOperationException; +} \ No newline at end of file diff --git a/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azurebfs/services/TestAbfsClient.java b/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azurebfs/services/TestAbfsClient.java index e8ab4291b32c5..6c5c40228d9cd 100644 --- a/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azurebfs/services/TestAbfsClient.java +++ b/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azurebfs/services/TestAbfsClient.java @@ -29,6 +29,7 @@ import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.azurebfs.AbfsConfiguration; import org.apache.hadoop.fs.azurebfs.AbfsCountersImpl; +import org.apache.hadoop.fs.azurebfs.MockIntercept; import org.apache.hadoop.fs.azurebfs.oauth2.AccessTokenProvider; import org.apache.hadoop.fs.azurebfs.utils.Base64; import org.apache.hadoop.fs.azurebfs.utils.MetricFormat; @@ -37,6 +38,7 @@ import static org.apache.hadoop.fs.azurebfs.constants.ConfigurationKeys.FS_AZURE_METRIC_ACCOUNT_NAME; import static org.apache.hadoop.fs.azurebfs.constants.ConfigurationKeys.FS_AZURE_METRIC_FORMAT; import static org.apache.hadoop.fs.azurebfs.services.AbfsClient.ABFS_CLIENT_TIMER_THREAD_NAME; +import static org.mockito.ArgumentMatchers.any; /** * Unit test cases for the AbfsClient class. @@ -138,4 +140,26 @@ private boolean isThreadRunning(String threadName) { } return false; } + + public static void mockAbfsOperationCreation(final AbfsClient abfsClient, + final MockIntercept mockIntercept) throws Exception { + Mockito.doAnswer(answer -> { + AbfsRestOperation op = Mockito.spy( + new AbfsRestOperation( + answer.getArgument(0), + abfsClient, + answer.getArgument(1), + answer.getArgument(2), + answer.getArgument(3), + abfsClient.getAbfsConfiguration() + )); + Mockito.doAnswer((answer1) -> { + mockIntercept.answer(op, answer1); + return null; + }).when(op) + .execute(any()); + return op; + }).when(abfsClient) + .getAbfsRestOperation(any(), any(), any(), any()); + } } diff --git a/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azurebfs/services/TestAbfsRenameRetryRecovery.java b/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azurebfs/services/TestAbfsRenameRetryRecovery.java index 03a3089923fd5..3b4eefe09f6f5 100644 --- a/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azurebfs/services/TestAbfsRenameRetryRecovery.java +++ b/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azurebfs/services/TestAbfsRenameRetryRecovery.java @@ -272,12 +272,18 @@ public void testRenameRecoveryEtagMatchFsLevel() throws IOException { fs.rename(new Path(path1), new Path(path2)); // validating stat counters after rename - // 4 calls should have happened in total for rename + // 5 calls should have happened in total for rename // 1 -> original rename rest call, 2 -> first retry, // +2 for getPathStatus calls + Long expectedConnectionsMade = 5 + connMadeBeforeRename; + if (fs.getAbfsStore().getAbfsConfiguration() + .getIsClientTransactionIdEnabled()) { + // 1 extra connection for the HEAD request to get the source path status + expectedConnectionsMade++; + } assertThatStatisticCounter(ioStats, CONNECTIONS_MADE.getStatName()) - .isEqualTo(5 + connMadeBeforeRename); + .isEqualTo(expectedConnectionsMade); // the RENAME_PATH_ATTEMPTS stat should be incremented by 1 // retries happen internally within AbfsRestOperation execute() // the stat for RENAME_PATH_ATTEMPTS is updated only once before execute() is called @@ -349,13 +355,21 @@ public void testRenameRecoveryFailsForDirFsLevel() throws Exception { assertEquals(false, renameResult); // validating stat counters after rename - // 3 calls should have happened in total for rename + // 4 calls should have happened in total for rename // 1 -> original rename rest call, 2 -> first retry, // +1 for getPathStatus calls // last getPathStatus call should be skipped + Long expectedConnectionsMade = 4 + connMadeBeforeRename; + if (fs.getAbfsStore().getAbfsConfiguration() + .getIsClientTransactionIdEnabled()) { + // 1 extra connection for the HEAD request to get the source path status + expectedConnectionsMade++; + } assertThatStatisticCounter(ioStats, CONNECTIONS_MADE.getStatName()) - .isEqualTo(4 + connMadeBeforeRename); + .isEqualTo(expectedConnectionsMade); + + // the RENAME_PATH_ATTEMPTS stat should be incremented by 1 // retries happen internally within AbfsRestOperation execute() From 086f2440907ac4afdac54a324d89f5448d48e5d7 Mon Sep 17 00:00:00 2001 From: bhattmanish98 Date: Thu, 6 Feb 2025 09:47:51 -0800 Subject: [PATCH 02/15] Java docs for test cases --- .../fs/azurebfs/AzureBlobFileSystemStore.java | 5 ++ .../ITestAzureBlobFileSystemMkDir.java | 40 ++++++++++---- .../ITestAzureBlobFileSystemRename.java | 53 +++++++++++-------- .../hadoop/fs/azurebfs/MockIntercept.java | 16 ++++++ .../fs/azurebfs/services/TestAbfsClient.java | 9 ++++ 5 files changed, 92 insertions(+), 31 deletions(-) diff --git a/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/AzureBlobFileSystemStore.java b/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/AzureBlobFileSystemStore.java index a68cfc29cd5b7..522d688f5d6be 100644 --- a/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/AzureBlobFileSystemStore.java +++ b/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/AzureBlobFileSystemStore.java @@ -2207,6 +2207,11 @@ void setClient(AbfsClient client) { this.client = client; } + @VisibleForTesting + void setClientHandler(AbfsClientHandler clientHandler) { + this.clientHandler = clientHandler; + } + @VisibleForTesting DataBlocks.BlockFactory getBlockFactory() { return blockFactory; diff --git a/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azurebfs/ITestAzureBlobFileSystemMkDir.java b/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azurebfs/ITestAzureBlobFileSystemMkDir.java index 0786eed174fea..fc1ea6ecb7ef4 100644 --- a/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azurebfs/ITestAzureBlobFileSystemMkDir.java +++ b/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azurebfs/ITestAzureBlobFileSystemMkDir.java @@ -36,6 +36,8 @@ import org.apache.hadoop.fs.azurebfs.security.ContextEncryptionAdapter; import org.apache.hadoop.fs.azurebfs.services.AbfsBlobClient; import org.apache.hadoop.fs.azurebfs.services.AbfsClient; +import org.apache.hadoop.fs.azurebfs.services.AbfsClientHandler; +import org.apache.hadoop.fs.azurebfs.services.AbfsDfsClient; import org.apache.hadoop.fs.azurebfs.services.AbfsHttpHeader; import org.apache.hadoop.fs.azurebfs.services.AbfsHttpOperation; import org.apache.hadoop.fs.azurebfs.services.AbfsRestOperation; @@ -49,7 +51,6 @@ import static org.apache.hadoop.fs.azurebfs.constants.HttpHeaderConfigurations.X_MS_CLIENT_TRANSACTION_ID; import static org.apache.hadoop.fs.contract.ContractTestUtils.assertMkdirs; import static org.apache.hadoop.test.LambdaTestUtils.intercept; -import static org.mockito.ArgumentMatchers.nullable; /** * Test mkdir operation. @@ -182,16 +183,35 @@ public void testMkdirWithExistingFilename() throws Exception { intercept(FileAlreadyExistsException.class, () -> fs.mkdirs(new Path("/testFilePath/newDir"))); } + /** + * Tests the idempotency of creating a path with retries by simulating + * a conflict response (HTTP 409) from the Azure Blob File System client. + * The method ensures that the path creation operation retries correctly + * with the proper transaction ID headers, verifying idempotency during + * failure recovery. + * + * @throws Exception if any error occurs during the operation. + */ @Test public void createPathRetryIdempotency() throws Exception { final AzureBlobFileSystem currentFs = getFileSystem(); Configuration config = new Configuration(this.getRawConfiguration()); - final AzureBlobFileSystem fs = (AzureBlobFileSystem) FileSystem.newInstance(currentFs.getUri(), config); - AbfsClient abfsClient = Mockito.spy(fs.getAbfsClient()); + config.set("fs.azure.enable.client.transaction.id", "true"); + final AzureBlobFileSystem fs = + (AzureBlobFileSystem) FileSystem.newInstance(currentFs.getUri(), + config); + assumeDfsServiceType(); + AzureBlobFileSystemStore store = Mockito.spy(fs.getAbfsStore()); + AbfsClientHandler clientHandler = Mockito.spy(store.getClientHandler()); + AbfsDfsClient abfsClient = (AbfsDfsClient) Mockito.spy(clientHandler.getClient()); fs.getAbfsStore().setClient(abfsClient); - final Path nonOverwriteFile = new Path("/NonOverwriteTest_FileName_" + UUID.randomUUID()); + fs.getAbfsStore().setClientHandler(clientHandler); + Mockito.doReturn(abfsClient).when(clientHandler).getIngressClient(); + final Path nonOverwriteFile = new Path( + "/NonOverwriteTest_FileName_" + UUID.randomUUID()); final List headers = new ArrayList<>(); - TestAbfsClient.mockAbfsOperationCreation(abfsClient, new MockIntercept() { + TestAbfsClient.mockAbfsOperationCreation(abfsClient, + new MockIntercept() { private int count = 0; @Override public void answer(final AbfsRestOperation mockedObj, @@ -205,8 +225,8 @@ public void answer(final AbfsRestOperation mockedObj, Mockito.doReturn(op).when(mockedObj).getResult(); Mockito.doReturn(HTTP_CONFLICT).when(op).getStatusCode(); headers.addAll(mockedObj.getRequestHeaders()); - throw new AbfsRestOperationException(HTTP_CONFLICT, AzureServiceErrorCode.PATH_CONFLICT.getErrorCode(), - "", null, op); + throw new AbfsRestOperationException(HTTP_CONFLICT, + AzureServiceErrorCode.PATH_CONFLICT.getErrorCode(), "", null, op); } } }); @@ -225,9 +245,9 @@ public void answer(final AbfsRestOperation mockedObj, Mockito.doReturn(true).when(getPathRestOp).hasResult(); Mockito.doReturn(op).when(getPathRestOp).getResult(); Mockito.doReturn(getPathRestOp).when(abfsClient).getPathStatus( - nullable(String.class), nullable(Boolean.class), - nullable(TracingContext.class), nullable(ContextEncryptionAdapter.class)); - + Mockito.nullable(String.class), Mockito.nullable(Boolean.class), + Mockito.nullable(TracingContext.class), + Mockito.nullable(ContextEncryptionAdapter.class)); fs.create(nonOverwriteFile, false); } } diff --git a/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azurebfs/ITestAzureBlobFileSystemRename.java b/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azurebfs/ITestAzureBlobFileSystemRename.java index 7867358149677..0b8fa80ea506f 100644 --- a/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azurebfs/ITestAzureBlobFileSystemRename.java +++ b/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azurebfs/ITestAzureBlobFileSystemRename.java @@ -1648,13 +1648,23 @@ public void testRenameSrcDirDeleteEmitDeletionCountInClientRequestId() fs.rename(new Path(dirPathStr), new Path("/dst/")); } + /** + * Test to verify the idempotency of the `rename` operation in Azure Blob File System when retrying + * after a failure. The test simulates a "path not found" error (HTTP 404) on the first attempt, + * checks that the operation correctly retries using the appropriate transaction ID, + * and ensures that the source file is renamed to the destination path once successful. + * + * @throws Exception if an error occurs during the file system operations or mocking + */ @Test public void renamePathRetryIdempotency() throws Exception { final AzureBlobFileSystem currentFs = getFileSystem(); Configuration config = new Configuration(this.getRawConfiguration()); + config.set("fs.azure.enable.client.transaction.id", "true"); final AzureBlobFileSystem fs = (AzureBlobFileSystem) FileSystem.newInstance(currentFs.getUri(), config); + assumeDfsServiceType(); AbfsClient abfsClient = Mockito.spy(fs.getAbfsClient()); fs.getAbfsStore().setClient(abfsClient); Path sourceDir = path("/testSrc"); @@ -1666,26 +1676,26 @@ public void renamePathRetryIdempotency() throws Exception { final List headers = new ArrayList<>(); TestAbfsClient.mockAbfsOperationCreation(abfsClient, new MockIntercept() { - private int count = 0; - @Override - public void answer(final AbfsRestOperation mockedObj, - final InvocationOnMock answer) throws AbfsRestOperationException { - if (count == 0) { - count = 1; - AbfsHttpOperation op = Mockito.mock(AbfsHttpOperation.class); - Mockito.doReturn("PUT").when(op).getMethod(); - Mockito.doReturn("").when(op).getStorageErrorMessage(); - Mockito.doReturn(SOURCE_PATH_NOT_FOUND.getErrorCode()).when(op) - .getStorageErrorCode(); - Mockito.doReturn(true).when(mockedObj).hasResult(); - Mockito.doReturn(op).when(mockedObj).getResult(); - Mockito.doReturn(HTTP_NOT_FOUND).when(op).getStatusCode(); - headers.addAll(mockedObj.getRequestHeaders()); - throw new AbfsRestOperationException(HTTP_NOT_FOUND, SOURCE_PATH_NOT_FOUND.getErrorCode(), - "", null, op); - } - } - }); + private int count = 0; + @Override + public void answer(final AbfsRestOperation mockedObj, + final InvocationOnMock answer) throws AbfsRestOperationException { + if (count == 0) { + count = 1; + AbfsHttpOperation op = Mockito.mock(AbfsHttpOperation.class); + Mockito.doReturn("PUT").when(op).getMethod(); + Mockito.doReturn("").when(op).getStorageErrorMessage(); + Mockito.doReturn(SOURCE_PATH_NOT_FOUND.getErrorCode()).when(op) + .getStorageErrorCode(); + Mockito.doReturn(true).when(mockedObj).hasResult(); + Mockito.doReturn(op).when(mockedObj).getResult(); + Mockito.doReturn(HTTP_NOT_FOUND).when(op).getStatusCode(); + headers.addAll(mockedObj.getRequestHeaders()); + throw new AbfsRestOperationException(HTTP_NOT_FOUND, + SOURCE_PATH_NOT_FOUND.getErrorCode(), "", null, op); + } + } + }); AbfsRestOperation getPathRestOp = Mockito.mock(AbfsRestOperation.class); AbfsHttpOperation op = Mockito.mock(AbfsHttpOperation.class); Mockito.doAnswer(answer -> { @@ -1703,7 +1713,8 @@ public void answer(final AbfsRestOperation mockedObj, Mockito.doReturn(DIRECTORY).when(op).getResponseHeader(X_MS_RESOURCE_TYPE); Mockito.doReturn(getPathRestOp).when(abfsClient).getPathStatus( Mockito.nullable(String.class), Mockito.nullable(Boolean.class), - Mockito.nullable(TracingContext.class), Mockito.nullable(ContextEncryptionAdapter.class)); + Mockito.nullable(TracingContext.class), + Mockito.nullable(ContextEncryptionAdapter.class)); fs.rename(sourceFilePath, destFilePath); } } diff --git a/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azurebfs/MockIntercept.java b/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azurebfs/MockIntercept.java index 3e1ce2351ff41..43a4873c6cfd6 100644 --- a/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azurebfs/MockIntercept.java +++ b/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azurebfs/MockIntercept.java @@ -22,6 +22,22 @@ import org.apache.hadoop.fs.azurebfs.contracts.exceptions.AbfsRestOperationException; +/** + * Interface used to intercept and customize the behavior of mocked + * `AbfsRestOperation` objects. The implementing class should define + * how to handle the mock operation when it is invoked. + * + * @param the type of the mocked object, typically an `AbfsRestOperation` + */ public interface MockIntercept { + + /** + * Defines custom behavior for handling the mocked object during its execution. + * + * @param mockedObj the mocked `AbfsRestOperation` object + * @param answer the invocation details for the mock method + * @throws AbfsRestOperationException if an error occurs during the + * mock operation handling + */ void answer(T mockedObj, InvocationOnMock answer) throws AbfsRestOperationException; } \ No newline at end of file diff --git a/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azurebfs/services/TestAbfsClient.java b/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azurebfs/services/TestAbfsClient.java index 6c5c40228d9cd..c8efa0f3b6878 100644 --- a/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azurebfs/services/TestAbfsClient.java +++ b/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azurebfs/services/TestAbfsClient.java @@ -141,6 +141,15 @@ private boolean isThreadRunning(String threadName) { return false; } + /** + * Mocks the creation of an `AbfsRestOperation` for the given `AbfsClient` and intercepts its execution. + * This method sets up a mock behavior where the `AbfsRestOperation` will call the provided `MockIntercept` + * to handle custom logic during the operation execution. + * + * @param abfsClient the `AbfsClient` to mock the operation for + * @param mockIntercept the mock interceptor that defines custom behavior during the operation execution + * @throws Exception if an error occurs while mocking the operation creation + */ public static void mockAbfsOperationCreation(final AbfsClient abfsClient, final MockIntercept mockIntercept) throws Exception { Mockito.doAnswer(answer -> { From b3d3c535dcb5c614bc06a8c5e33de63dd134082d Mon Sep 17 00:00:00 2001 From: bhattmanish98 Date: Thu, 13 Feb 2025 01:42:34 -0800 Subject: [PATCH 03/15] xMSVersion update for client transaction id support --- .../fs/azurebfs/AzureBlobFileSystemStore.java | 1 - .../azurebfs/constants/AbfsHttpConstants.java | 3 +- .../fs/azurebfs/services/AbfsClient.java | 5 + .../ITestAzureBlobFileSystemMkDir.java | 144 +++++++++++------ .../ITestAzureBlobFileSystemRename.java | 153 +++++++++++------- .../services/TestAbfsRenameRetryRecovery.java | 1 + 6 files changed, 196 insertions(+), 111 deletions(-) diff --git a/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/AzureBlobFileSystemStore.java b/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/AzureBlobFileSystemStore.java index 522d688f5d6be..aa51b23471371 100644 --- a/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/AzureBlobFileSystemStore.java +++ b/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/AzureBlobFileSystemStore.java @@ -148,7 +148,6 @@ import static org.apache.hadoop.fs.azurebfs.constants.AbfsHttpConstants.CHAR_STAR; import static org.apache.hadoop.fs.azurebfs.constants.AbfsHttpConstants.CHAR_UNDERSCORE; import static org.apache.hadoop.fs.azurebfs.constants.AbfsHttpConstants.DIRECTORY; -import static org.apache.hadoop.fs.azurebfs.constants.AbfsHttpConstants.EMPTY_STRING; import static org.apache.hadoop.fs.azurebfs.constants.AbfsHttpConstants.FILE; import static org.apache.hadoop.fs.azurebfs.constants.AbfsHttpConstants.ROOT_PATH; import static org.apache.hadoop.fs.azurebfs.constants.AbfsHttpConstants.SINGLE_WHITE_SPACE; diff --git a/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/constants/AbfsHttpConstants.java b/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/constants/AbfsHttpConstants.java index 3d14ec848df9d..f32566f26483b 100644 --- a/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/constants/AbfsHttpConstants.java +++ b/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/constants/AbfsHttpConstants.java @@ -184,7 +184,8 @@ public enum ApiVersion { DEC_12_2019("2019-12-12"), APR_10_2021("2021-04-10"), - AUG_03_2023("2023-08-03"); + AUG_03_2023("2023-08-03"), + NOV_04_2024("2024-11-04"); private final String xMsApiVersion; diff --git a/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/services/AbfsClient.java b/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/services/AbfsClient.java index 47ae988419c85..094e335f94bf9 100644 --- a/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/services/AbfsClient.java +++ b/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/services/AbfsClient.java @@ -215,6 +215,11 @@ private AbfsClient(final URL baseUrl, encryptionType = EncryptionType.GLOBAL_KEY; } + // Version update needed to support x-ms-client-transaction-id header + if (abfsConfiguration.getIsClientTransactionIdEnabled()) { + xMsVersion = ApiVersion.NOV_04_2024; + } + String sslProviderName = null; if (this.baseUrl.toString().startsWith(HTTPS_SCHEME)) { diff --git a/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azurebfs/ITestAzureBlobFileSystemMkDir.java b/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azurebfs/ITestAzureBlobFileSystemMkDir.java index fc1ea6ecb7ef4..1ca468fb545c5 100644 --- a/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azurebfs/ITestAzureBlobFileSystemMkDir.java +++ b/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azurebfs/ITestAzureBlobFileSystemMkDir.java @@ -22,6 +22,7 @@ import java.util.List; import java.util.UUID; +import org.assertj.core.api.Assertions; import org.junit.Assume; import org.junit.Test; import org.mockito.Mockito; @@ -46,6 +47,7 @@ import static java.net.HttpURLConnection.HTTP_CONFLICT; import static org.apache.hadoop.fs.azurebfs.AbfsStatistic.CONNECTIONS_MADE; +import static org.apache.hadoop.fs.azurebfs.constants.ConfigurationKeys.FS_AZURE_ENABLE_CLIENT_TRANSACTION_ID; import static org.apache.hadoop.fs.azurebfs.constants.ConfigurationKeys.FS_AZURE_ENABLE_MKDIR_OVERWRITE; import static org.apache.hadoop.fs.azurebfs.constants.FileSystemConfigurations.DEFAULT_FS_AZURE_ENABLE_MKDIR_OVERWRITE; import static org.apache.hadoop.fs.azurebfs.constants.HttpHeaderConfigurations.X_MS_CLIENT_TRANSACTION_ID; @@ -194,60 +196,96 @@ public void testMkdirWithExistingFilename() throws Exception { */ @Test public void createPathRetryIdempotency() throws Exception { - final AzureBlobFileSystem currentFs = getFileSystem(); Configuration config = new Configuration(this.getRawConfiguration()); - config.set("fs.azure.enable.client.transaction.id", "true"); - final AzureBlobFileSystem fs = - (AzureBlobFileSystem) FileSystem.newInstance(currentFs.getUri(), - config); - assumeDfsServiceType(); - AzureBlobFileSystemStore store = Mockito.spy(fs.getAbfsStore()); - AbfsClientHandler clientHandler = Mockito.spy(store.getClientHandler()); - AbfsDfsClient abfsClient = (AbfsDfsClient) Mockito.spy(clientHandler.getClient()); - fs.getAbfsStore().setClient(abfsClient); - fs.getAbfsStore().setClientHandler(clientHandler); - Mockito.doReturn(abfsClient).when(clientHandler).getIngressClient(); - final Path nonOverwriteFile = new Path( - "/NonOverwriteTest_FileName_" + UUID.randomUUID()); - final List headers = new ArrayList<>(); - TestAbfsClient.mockAbfsOperationCreation(abfsClient, - new MockIntercept() { - private int count = 0; - @Override - public void answer(final AbfsRestOperation mockedObj, - final InvocationOnMock answer) throws AbfsRestOperationException { - if (count == 0) { - count = 1; - AbfsHttpOperation op = Mockito.mock(AbfsHttpOperation.class); - Mockito.doReturn("PUT").when(op).getMethod(); - Mockito.doReturn("").when(op).getStorageErrorMessage(); - Mockito.doReturn(true).when(mockedObj).hasResult(); - Mockito.doReturn(op).when(mockedObj).getResult(); - Mockito.doReturn(HTTP_CONFLICT).when(op).getStatusCode(); - headers.addAll(mockedObj.getRequestHeaders()); - throw new AbfsRestOperationException(HTTP_CONFLICT, - AzureServiceErrorCode.PATH_CONFLICT.getErrorCode(), "", null, op); - } - } - }); - AbfsRestOperation getPathRestOp = Mockito.mock(AbfsRestOperation.class); - AbfsHttpOperation op = Mockito.mock(AbfsHttpOperation.class); - Mockito.doAnswer(answer -> { - String requiredHeader = null; - for (AbfsHttpHeader httpHeader : headers) { - if (X_MS_CLIENT_TRANSACTION_ID.equalsIgnoreCase(httpHeader.getName())) { - requiredHeader = httpHeader.getValue(); - break; + config.set(FS_AZURE_ENABLE_CLIENT_TRANSACTION_ID, "true"); + try (final AzureBlobFileSystem fs = getFileSystem(config)) { + assumeDfsServiceType(); + AzureBlobFileSystemStore store = Mockito.spy(fs.getAbfsStore()); + AbfsClientHandler clientHandler = Mockito.spy(store.getClientHandler()); + AbfsDfsClient abfsClient = (AbfsDfsClient) Mockito.spy( + clientHandler.getClient()); + fs.getAbfsStore().setClient(abfsClient); + fs.getAbfsStore().setClientHandler(clientHandler); + Mockito.doReturn(abfsClient).when(clientHandler).getIngressClient(); + final Path nonOverwriteFile = new Path( + "/NonOverwriteTest_FileName_" + UUID.randomUUID()); + final List headers = new ArrayList<>(); + TestAbfsClient.mockAbfsOperationCreation(abfsClient, + new MockIntercept() { + private int count = 0; + + @Override + public void answer(final AbfsRestOperation mockedObj, + final InvocationOnMock answer) + throws AbfsRestOperationException { + if (count == 0) { + count = 1; + AbfsHttpOperation op = Mockito.mock(AbfsHttpOperation.class); + Mockito.doReturn("PUT").when(op).getMethod(); + Mockito.doReturn("").when(op).getStorageErrorMessage(); + Mockito.doReturn(true).when(mockedObj).hasResult(); + Mockito.doReturn(op).when(mockedObj).getResult(); + Mockito.doReturn(HTTP_CONFLICT).when(op).getStatusCode(); + headers.addAll(mockedObj.getRequestHeaders()); + throw new AbfsRestOperationException(HTTP_CONFLICT, + AzureServiceErrorCode.PATH_CONFLICT.getErrorCode(), "", + null, op); + } + } + }); + AbfsRestOperation getPathRestOp = Mockito.mock(AbfsRestOperation.class); + AbfsHttpOperation op = Mockito.mock(AbfsHttpOperation.class); + Mockito.doAnswer(answer -> { + String requiredHeader = null; + for (AbfsHttpHeader httpHeader : headers) { + if (X_MS_CLIENT_TRANSACTION_ID.equalsIgnoreCase( + httpHeader.getName())) { + requiredHeader = httpHeader.getValue(); + break; + } } - } - return requiredHeader; - }).when(op).getResponseHeader(X_MS_CLIENT_TRANSACTION_ID); - Mockito.doReturn(true).when(getPathRestOp).hasResult(); - Mockito.doReturn(op).when(getPathRestOp).getResult(); - Mockito.doReturn(getPathRestOp).when(abfsClient).getPathStatus( - Mockito.nullable(String.class), Mockito.nullable(Boolean.class), - Mockito.nullable(TracingContext.class), - Mockito.nullable(ContextEncryptionAdapter.class)); - fs.create(nonOverwriteFile, false); + return requiredHeader; + }).when(op).getResponseHeader(X_MS_CLIENT_TRANSACTION_ID); + Mockito.doReturn(true).when(getPathRestOp).hasResult(); + Mockito.doReturn(op).when(getPathRestOp).getResult(); + Mockito.doReturn(getPathRestOp).when(abfsClient).getPathStatus( + Mockito.nullable(String.class), Mockito.nullable(Boolean.class), + Mockito.nullable(TracingContext.class), + Mockito.nullable(ContextEncryptionAdapter.class)); + fs.create(nonOverwriteFile, false); + } + } + + /** + * Test to verify that the client transaction ID is included in the response header + * during the creation of a new file in Azure Blob Storage. + *

+ * This test ensures that when a new file is created, the Azure Blob FileSystem client + * correctly includes the client transaction ID in the response header for the created file. + * The test uses a configuration where client transaction ID is enabled and verifies + * its presence after the file creation operation. + *

+ * + * @throws Exception if any error occurs during test execution + */ + @Test + public void getClientTransactionIdAfterCreate() throws Exception { + Configuration config = new Configuration(this.getRawConfiguration()); + config.set(FS_AZURE_ENABLE_CLIENT_TRANSACTION_ID, "true"); + try (final AzureBlobFileSystem fs = getFileSystem(config)) { + assumeDfsServiceType(); + AbfsDfsClient abfsDfsClient = (AbfsDfsClient) fs.getAbfsClient(); + final Path nonOverwriteFile = new Path( + "/NonOverwriteTest_FileName_" + UUID.randomUUID()); + fs.create(nonOverwriteFile, false); + + final AbfsHttpOperation getPathStatusOp = + abfsDfsClient.getPathStatus(nonOverwriteFile.toUri().getPath(), false, + getTestTracingContext(fs, true), null).getResult(); + Assertions.assertThat( + getPathStatusOp.getResponseHeader(X_MS_CLIENT_TRANSACTION_ID)) + .describedAs("Client transaction ID should be set during create") + .isNotNull(); + } } } diff --git a/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azurebfs/ITestAzureBlobFileSystemRename.java b/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azurebfs/ITestAzureBlobFileSystemRename.java index 0b8fa80ea506f..1ed9c075a2768 100644 --- a/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azurebfs/ITestAzureBlobFileSystemRename.java +++ b/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azurebfs/ITestAzureBlobFileSystemRename.java @@ -75,6 +75,7 @@ import static org.apache.hadoop.fs.azurebfs.constants.AbfsHttpConstants.COPY_STATUS_PENDING; import static org.apache.hadoop.fs.azurebfs.constants.AbfsHttpConstants.DIRECTORY; import static org.apache.hadoop.fs.azurebfs.constants.AbfsHttpConstants.ROOT_PATH; +import static org.apache.hadoop.fs.azurebfs.constants.ConfigurationKeys.FS_AZURE_ENABLE_CLIENT_TRANSACTION_ID; import static org.apache.hadoop.fs.azurebfs.constants.ConfigurationKeys.FS_AZURE_LEASE_THREADS; import static org.apache.hadoop.fs.azurebfs.constants.HttpHeaderConfigurations.X_MS_CLIENT_TRANSACTION_ID; import static org.apache.hadoop.fs.azurebfs.constants.HttpHeaderConfigurations.X_MS_RESOURCE_TYPE; @@ -1658,63 +1659,103 @@ public void testRenameSrcDirDeleteEmitDeletionCountInClientRequestId() */ @Test public void renamePathRetryIdempotency() throws Exception { - final AzureBlobFileSystem currentFs = getFileSystem(); Configuration config = new Configuration(this.getRawConfiguration()); - config.set("fs.azure.enable.client.transaction.id", "true"); - final AzureBlobFileSystem fs = - (AzureBlobFileSystem) FileSystem.newInstance(currentFs.getUri(), - config); - assumeDfsServiceType(); - AbfsClient abfsClient = Mockito.spy(fs.getAbfsClient()); - fs.getAbfsStore().setClient(abfsClient); - Path sourceDir = path("/testSrc"); - assertMkdirs(fs, sourceDir); - String filename = "file1"; - Path sourceFilePath = new Path(sourceDir, filename); - touch(sourceFilePath); - Path destFilePath = new Path(sourceDir, "file2"); - final List headers = new ArrayList<>(); - TestAbfsClient.mockAbfsOperationCreation(abfsClient, - new MockIntercept() { - private int count = 0; - @Override - public void answer(final AbfsRestOperation mockedObj, - final InvocationOnMock answer) throws AbfsRestOperationException { - if (count == 0) { - count = 1; - AbfsHttpOperation op = Mockito.mock(AbfsHttpOperation.class); - Mockito.doReturn("PUT").when(op).getMethod(); - Mockito.doReturn("").when(op).getStorageErrorMessage(); - Mockito.doReturn(SOURCE_PATH_NOT_FOUND.getErrorCode()).when(op) - .getStorageErrorCode(); - Mockito.doReturn(true).when(mockedObj).hasResult(); - Mockito.doReturn(op).when(mockedObj).getResult(); - Mockito.doReturn(HTTP_NOT_FOUND).when(op).getStatusCode(); - headers.addAll(mockedObj.getRequestHeaders()); - throw new AbfsRestOperationException(HTTP_NOT_FOUND, - SOURCE_PATH_NOT_FOUND.getErrorCode(), "", null, op); - } - } - }); - AbfsRestOperation getPathRestOp = Mockito.mock(AbfsRestOperation.class); - AbfsHttpOperation op = Mockito.mock(AbfsHttpOperation.class); - Mockito.doAnswer(answer -> { - String requiredHeader = null; - for (AbfsHttpHeader httpHeader : headers) { - if (X_MS_CLIENT_TRANSACTION_ID.equalsIgnoreCase(httpHeader.getName())) { - requiredHeader = httpHeader.getValue(); - break; + config.set(FS_AZURE_ENABLE_CLIENT_TRANSACTION_ID, "true"); + try (final AzureBlobFileSystem fs = getFileSystem(config)) { + assumeDfsServiceType(); + AbfsClient abfsClient = Mockito.spy(fs.getAbfsClient()); + fs.getAbfsStore().setClient(abfsClient); + Path sourceDir = path("/testSrc"); + assertMkdirs(fs, sourceDir); + String filename = "file1"; + Path sourceFilePath = new Path(sourceDir, filename); + touch(sourceFilePath); + Path destFilePath = new Path(sourceDir, "file2"); + final List headers = new ArrayList<>(); + TestAbfsClient.mockAbfsOperationCreation(abfsClient, + new MockIntercept() { + private int count = 0; + + @Override + public void answer(final AbfsRestOperation mockedObj, + final InvocationOnMock answer) + throws AbfsRestOperationException { + if (count == 0) { + count = 1; + AbfsHttpOperation op = Mockito.mock(AbfsHttpOperation.class); + Mockito.doReturn("PUT").when(op).getMethod(); + Mockito.doReturn("").when(op).getStorageErrorMessage(); + Mockito.doReturn(SOURCE_PATH_NOT_FOUND.getErrorCode()).when(op) + .getStorageErrorCode(); + Mockito.doReturn(true).when(mockedObj).hasResult(); + Mockito.doReturn(op).when(mockedObj).getResult(); + Mockito.doReturn(HTTP_NOT_FOUND).when(op).getStatusCode(); + headers.addAll(mockedObj.getRequestHeaders()); + throw new AbfsRestOperationException(HTTP_NOT_FOUND, + SOURCE_PATH_NOT_FOUND.getErrorCode(), "", null, op); + } + } + }); + AbfsRestOperation getPathRestOp = Mockito.mock(AbfsRestOperation.class); + AbfsHttpOperation op = Mockito.mock(AbfsHttpOperation.class); + Mockito.doAnswer(answer -> { + String requiredHeader = null; + for (AbfsHttpHeader httpHeader : headers) { + if (X_MS_CLIENT_TRANSACTION_ID.equalsIgnoreCase( + httpHeader.getName())) { + requiredHeader = httpHeader.getValue(); + break; + } } - } - return requiredHeader; - }).when(op).getResponseHeader(X_MS_CLIENT_TRANSACTION_ID); - Mockito.doReturn(true).when(getPathRestOp).hasResult(); - Mockito.doReturn(op).when(getPathRestOp).getResult(); - Mockito.doReturn(DIRECTORY).when(op).getResponseHeader(X_MS_RESOURCE_TYPE); - Mockito.doReturn(getPathRestOp).when(abfsClient).getPathStatus( - Mockito.nullable(String.class), Mockito.nullable(Boolean.class), - Mockito.nullable(TracingContext.class), - Mockito.nullable(ContextEncryptionAdapter.class)); - fs.rename(sourceFilePath, destFilePath); + return requiredHeader; + }).when(op).getResponseHeader(X_MS_CLIENT_TRANSACTION_ID); + Mockito.doReturn(true).when(getPathRestOp).hasResult(); + Mockito.doReturn(op).when(getPathRestOp).getResult(); + Mockito.doReturn(DIRECTORY) + .when(op) + .getResponseHeader(X_MS_RESOURCE_TYPE); + Mockito.doReturn(getPathRestOp).when(abfsClient).getPathStatus( + Mockito.nullable(String.class), Mockito.nullable(Boolean.class), + Mockito.nullable(TracingContext.class), + Mockito.nullable(ContextEncryptionAdapter.class)); + fs.rename(sourceFilePath, destFilePath); + } + } + + /** + * Test to verify that the client transaction ID is included in the response header + * after renaming a file in Azure Blob Storage. + *

+ * This test ensures that when a file is renamed, the Azure Blob FileSystem client + * properly includes the client transaction ID in the response header for the renamed file. + * The test uses a configuration where client transaction ID is enabled and verifies + * its presence after performing a rename operation. + *

+ * + * @throws Exception if any error occurs during test execution + */ + @Test + public void getClientTransactionIdAfterRename() throws Exception { + Configuration config = new Configuration(this.getRawConfiguration()); + config.set(FS_AZURE_ENABLE_CLIENT_TRANSACTION_ID, "true"); + try (final AzureBlobFileSystem fs = getFileSystem(config)) { + assumeDfsServiceType(); + AbfsDfsClient abfsDfsClient = (AbfsDfsClient) fs.getAbfsClient(); + Path sourceDir = path("/testSrc"); + assertMkdirs(fs, sourceDir); + String filename = "file1"; + Path sourceFilePath = new Path(sourceDir, filename); + touch(sourceFilePath); + Path destFilePath = new Path(sourceDir, "file2"); + fs.rename(sourceFilePath, destFilePath); + + final AbfsHttpOperation getPathStatusOp = + abfsDfsClient.getPathStatus(destFilePath.toUri().getPath(), false, + getTestTracingContext(fs, true), null).getResult(); + Assertions.assertThat( + getPathStatusOp.getResponseHeader(X_MS_CLIENT_TRANSACTION_ID)) + .describedAs("Client transaction id should be present in dest file") + .isNotNull(); + } } } diff --git a/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azurebfs/services/TestAbfsRenameRetryRecovery.java b/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azurebfs/services/TestAbfsRenameRetryRecovery.java index 3b4eefe09f6f5..12d20a65e347e 100644 --- a/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azurebfs/services/TestAbfsRenameRetryRecovery.java +++ b/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azurebfs/services/TestAbfsRenameRetryRecovery.java @@ -78,6 +78,7 @@ public class TestAbfsRenameRetryRecovery extends AbstractAbfsIntegrationTest { public TestAbfsRenameRetryRecovery() throws Exception { isNamespaceEnabled = getConfiguration() .getBoolean(TestConfigurationKeys.FS_AZURE_TEST_NAMESPACE_ENABLED_ACCOUNT, false); + Assume.assumeTrue(getConfiguration().getIsClientTransactionIdEnabled()); } /** From 13d87cf6c49947be01e8d7a1ac1f15ae5324bd07 Mon Sep 17 00:00:00 2001 From: bhattmanish98 Date: Sun, 16 Feb 2025 03:52:04 -0800 Subject: [PATCH 04/15] Test fixes for create rename idempotency --- .../fs/azurebfs/services/AbfsDfsClient.java | 22 ++++++--- .../azurebfs/AbstractAbfsIntegrationTest.java | 32 +++++++++++++ .../azurebfs/ITestAbfsNetworkStatistics.java | 5 -- .../ITestAzureBlobFileSystemCreate.java | 45 +++++------------ .../ITestAzureBlobFileSystemMkDir.java | 48 +++++++++++++++---- .../ITestAzureBlobFileSystemRename.java | 14 +++--- .../fs/azurebfs/services/TestAbfsClient.java | 1 + .../services/TestAbfsRenameRetryRecovery.java | 22 ++------- 8 files changed, 111 insertions(+), 78 deletions(-) diff --git a/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/services/AbfsDfsClient.java b/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/services/AbfsDfsClient.java index 08e6f6f536263..c4ccc1813dd6a 100644 --- a/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/services/AbfsDfsClient.java +++ b/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/services/AbfsDfsClient.java @@ -406,14 +406,21 @@ public AbfsRestOperation createPath(final String path, if (!op.hasResult()) { throw ex; } - if (op.getResult().getStatusCode() == HttpURLConnection.HTTP_CONFLICT) { - if (!isFile) { + if (!isFile) { + if (op.getResult().getStatusCode() == HttpURLConnection.HTTP_CONFLICT) { String existingResource = op.getResult().getResponseHeader(X_MS_EXISTING_RESOURCE_TYPE); if (existingResource != null && existingResource.equals(DIRECTORY)) { return op; //don't throw ex on mkdirs for existing directory } - } else if (clientTransactionId != null) { + } + } else { + // recovery using client transaction id only if it is a retried request. + if (op.isARetriedRequest() && clientTransactionId != null + && (op.getResult().getStatusCode() == + HttpURLConnection.HTTP_CONFLICT + || op.getResult().getStatusCode() == + HttpURLConnection.HTTP_PRECON_FAILED)) { try { final AbfsHttpOperation getPathStatusOp = getPathStatus(path, false, @@ -725,8 +732,10 @@ public AbfsClientRenameResult renamePath( throw e; } - if (clientTransactionId != null && SOURCE_PATH_NOT_FOUND.getErrorCode() - .equalsIgnoreCase(op.getResult().getStorageErrorCode())) { + // recovery using client transaction id only if it is a retried request. + if (op.isARetriedRequest() && clientTransactionId != null + && SOURCE_PATH_NOT_FOUND.getErrorCode().equalsIgnoreCase( + op.getResult().getStorageErrorCode())) { try { final AbfsHttpOperation abfsHttpOperation = getPathStatus(destination, false, @@ -1630,7 +1639,8 @@ private Hashtable parseCommaSeparatedXmsProperties(String xMsPro */ private String addClientTransactionIdToHeader(List requestHeaders) { String clientTransactionId = null; - if (getAbfsConfiguration().getIsClientTransactionIdEnabled()) { + // Set client transaction ID if the namespace and client transaction ID config are enabled. + if (getIsNamespaceEnabled() && getAbfsConfiguration().getIsClientTransactionIdEnabled()) { clientTransactionId = UUID.randomUUID().toString(); requestHeaders.add( new AbfsHttpHeader(X_MS_CLIENT_TRANSACTION_ID, clientTransactionId)); diff --git a/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azurebfs/AbstractAbfsIntegrationTest.java b/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azurebfs/AbstractAbfsIntegrationTest.java index 4003da49e5147..647364c5fdce0 100644 --- a/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azurebfs/AbstractAbfsIntegrationTest.java +++ b/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azurebfs/AbstractAbfsIntegrationTest.java @@ -46,6 +46,7 @@ import org.apache.hadoop.fs.azurebfs.oauth2.AccessTokenProvider; import org.apache.hadoop.fs.azurebfs.security.AbfsDelegationTokenManager; import org.apache.hadoop.fs.azurebfs.services.AbfsClient; +import org.apache.hadoop.fs.azurebfs.services.AbfsDfsClient; import org.apache.hadoop.fs.azurebfs.services.AbfsOutputStream; import org.apache.hadoop.fs.azurebfs.services.AuthType; import org.apache.hadoop.fs.azurebfs.services.ITestAbfsClient; @@ -733,4 +734,35 @@ protected void checkFuturesForExceptions(List> futures, int exceptionV } assertEquals(exceptionCaught, exceptionVal); } + + /** + * Assumes that recovery through client transaction ID is enabled. + * Namespace is enabled for the given AzureBlobFileSystem. + * Service type is DFS. + * Assumes that the client transaction ID is enabled in the configuration. + * + * @param fs the AzureBlobFileSystem instance to check + * @throws AzureBlobFileSystemException in case of an error + */ + protected void assumeRecoveryThroughClientTransactionID( + AzureBlobFileSystem fs, boolean isCreate) + throws AzureBlobFileSystemException { + // Assumes that recovery through client transaction ID is enabled. + Assume.assumeTrue(getConfiguration().getIsClientTransactionIdEnabled()); + // Assumes that service type is DFS. + assumeDfsServiceType(); + // Assumes that namespace is enabled for the given AzureBlobFileSystem. + Assume.assumeTrue( + fs.getIsNamespaceEnabled(getTestTracingContext(fs, true))); + if (isCreate) { + // Assume that create client is DFS client. + Assume.assumeTrue( + fs.getAbfsStore().getClientHandler().getIngressClient() + instanceof AbfsDfsClient); + // Assume that append blob is not enabled in DFS client. + Assume.assumeTrue( + StringUtils.isEmpty( + fs.getAbfsStore().getAbfsConfiguration().getAppendBlobDirs())); + } + } } diff --git a/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azurebfs/ITestAbfsNetworkStatistics.java b/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azurebfs/ITestAbfsNetworkStatistics.java index cc58d9a590d65..e66afbcaa7492 100644 --- a/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azurebfs/ITestAbfsNetworkStatistics.java +++ b/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azurebfs/ITestAbfsNetworkStatistics.java @@ -185,11 +185,6 @@ public void testAbfsHttpSendStatistics() throws IOException { } else { expectedConnectionsMade += 3; expectedRequestsSent += 2; - if (fs.getAbfsStore().getAbfsConfiguration() - .getIsClientTransactionIdEnabled()) { - // 1 extra connection for the HEAD request to get the source path status - expectedConnectionsMade++; - } } } else { expectedConnectionsMade += 1; diff --git a/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azurebfs/ITestAzureBlobFileSystemCreate.java b/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azurebfs/ITestAzureBlobFileSystemCreate.java index 669e1deb04d00..e8afe36c5c331 100644 --- a/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azurebfs/ITestAzureBlobFileSystemCreate.java +++ b/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azurebfs/ITestAzureBlobFileSystemCreate.java @@ -496,25 +496,10 @@ public void testCreateFileOverwrite(boolean enableConditionalCreateOverwrite) // Only single tryGetFileStatus should happen // 1. getFileStatus on DFS endpoint : 1 // getFileStatus on Blob endpoint: 1 (No Additional List blob call as file exists) - createRequestCount += ( client instanceof AbfsBlobClient && !getIsNamespaceEnabled(fs) ? 2 : 1); - // One request to server to create path should be issued - // Only single tryGetFileStatus should happen - // 1. getFileStatus on DFS endpoint : 1 - // getFileStatus on Blob endpoint: 1 (No Additional List blob call as file exists) - if (client instanceof AbfsBlobClient && !getIsNamespaceEnabled(fs)) { - createRequestCount += 2; - } else { - createRequestCount += 1; - if (fs.getAbfsStore().getAbfsConfiguration() - .getIsClientTransactionIdEnabled()) { - // 1 extra connection for the HEAD request to get the source path status - createRequestCount += 1; - } - } assertAbfsStatistics( CONNECTIONS_MADE, @@ -556,26 +541,18 @@ public void testCreateFileOverwrite(boolean enableConditionalCreateOverwrite) ? 1 : 0); - // Second actual create call will hap - if (enableConditionalCreateOverwrite) { - // Three requests will be sent to server to create path, - // 1. create without overwrite - // 2. GetFileStatus to get eTag - // 3. create with overwrite - if (client instanceof AbfsBlobClient && !getIsNamespaceEnabled(fs)) { - createRequestCount += 4; - } else { - createRequestCount += 3; - if (fs.getAbfsStore().getAbfsConfiguration() - .getIsClientTransactionIdEnabled()) { - // 1 extra connection for the HEAD request to get the source path status - createRequestCount += 1; - } - } + // Second actual create call will hap + if (enableConditionalCreateOverwrite) { + // Three requests will be sent to server to create path, + // 1. create without overwrite + // 2. GetFileStatus to get eTag + // 3. create with overwrite + createRequestCount += 3; + } else { + createRequestCount += (client instanceof AbfsBlobClient + && !getIsNamespaceEnabled(fs)) ? 2 : 1; - } else { - createRequestCount++; - } + } assertAbfsStatistics( CONNECTIONS_MADE, diff --git a/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azurebfs/ITestAzureBlobFileSystemMkDir.java b/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azurebfs/ITestAzureBlobFileSystemMkDir.java index 5049eab5b90ba..8913bb61a084f 100644 --- a/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azurebfs/ITestAzureBlobFileSystemMkDir.java +++ b/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azurebfs/ITestAzureBlobFileSystemMkDir.java @@ -44,6 +44,7 @@ import org.apache.hadoop.fs.azurebfs.services.AbfsRestOperation; import org.apache.hadoop.fs.azurebfs.services.TestAbfsClient; import org.apache.hadoop.fs.azurebfs.utils.TracingContext; +import org.apache.hadoop.fs.permission.FsPermission; import static java.net.HttpURLConnection.HTTP_CONFLICT; import static org.apache.hadoop.fs.azurebfs.AbfsStatistic.CONNECTIONS_MADE; @@ -196,10 +197,10 @@ public void testMkdirWithExistingFilename() throws Exception { */ @Test public void createPathRetryIdempotency() throws Exception { - Configuration config = new Configuration(this.getRawConfiguration()); - config.set(FS_AZURE_ENABLE_CLIENT_TRANSACTION_ID, "true"); - try (final AzureBlobFileSystem fs = getFileSystem(config)) { - assumeDfsServiceType(); + Configuration configuration = new Configuration(getRawConfiguration()); + configuration.set(FS_AZURE_ENABLE_CLIENT_TRANSACTION_ID, "true"); + try (AzureBlobFileSystem fs = getFileSystem(configuration)) { + assumeRecoveryThroughClientTransactionID(fs, true); AzureBlobFileSystemStore store = Mockito.spy(fs.getAbfsStore()); AbfsClientHandler clientHandler = Mockito.spy(store.getClientHandler()); AbfsDfsClient abfsClient = (AbfsDfsClient) Mockito.spy( @@ -270,10 +271,8 @@ public void answer(final AbfsRestOperation mockedObj, */ @Test public void getClientTransactionIdAfterCreate() throws Exception { - Configuration config = new Configuration(this.getRawConfiguration()); - config.set(FS_AZURE_ENABLE_CLIENT_TRANSACTION_ID, "true"); - try (final AzureBlobFileSystem fs = getFileSystem(config)) { - assumeDfsServiceType(); + try (AzureBlobFileSystem fs = getFileSystem()) { + assumeRecoveryThroughClientTransactionID(fs, true); AbfsDfsClient abfsDfsClient = (AbfsDfsClient) fs.getAbfsClient(); final Path nonOverwriteFile = new Path( "/NonOverwriteTest_FileName_" + UUID.randomUUID()); @@ -288,4 +287,37 @@ public void getClientTransactionIdAfterCreate() throws Exception { .isNotNull(); } } + + /** + * Test to verify that the client transaction ID is included in the response header + * after two consecutive create operations on the same file in Azure Blob Storage. + *

+ * This test ensures that even after performing two create operations (with overwrite) + * on the same file, the Azure Blob FileSystem client includes the client transaction ID + * in the response header for the created file. The test checks for the presence of + * the client transaction ID in the response after the second create call. + *

+ * + * @throws Exception if any error occurs during test execution + */ + @Test + public void testClientTransactionIdAfterTwoCreateCalls() throws Exception { + try (AzureBlobFileSystem fs = getFileSystem()) { + assumeRecoveryThroughClientTransactionID(fs, true); + AbfsDfsClient abfsDfsClient = (AbfsDfsClient) fs.getAbfsClient(); + Path testPath = path("testfile"); + AzureBlobFileSystemStore.Permissions permissions + = new AzureBlobFileSystemStore.Permissions(false, + FsPermission.getDefault(), FsPermission.getUMask(fs.getConf())); + fs.create(testPath, false); + fs.create(testPath, true); + final AbfsHttpOperation getPathStatusOp = + abfsDfsClient.getPathStatus(testPath.toUri().getPath(), false, + getTestTracingContext(fs, true), null).getResult(); + Assertions.assertThat( + getPathStatusOp.getResponseHeader(X_MS_CLIENT_TRANSACTION_ID)) + .describedAs("Client transaction ID should be set during create") + .isNotNull(); + } + } } diff --git a/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azurebfs/ITestAzureBlobFileSystemRename.java b/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azurebfs/ITestAzureBlobFileSystemRename.java index 1ed9c075a2768..ba5baa16a754c 100644 --- a/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azurebfs/ITestAzureBlobFileSystemRename.java +++ b/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azurebfs/ITestAzureBlobFileSystemRename.java @@ -1659,10 +1659,10 @@ public void testRenameSrcDirDeleteEmitDeletionCountInClientRequestId() */ @Test public void renamePathRetryIdempotency() throws Exception { - Configuration config = new Configuration(this.getRawConfiguration()); - config.set(FS_AZURE_ENABLE_CLIENT_TRANSACTION_ID, "true"); - try (final AzureBlobFileSystem fs = getFileSystem(config)) { - assumeDfsServiceType(); + Configuration configuration = new Configuration(getRawConfiguration()); + configuration.set(FS_AZURE_ENABLE_CLIENT_TRANSACTION_ID, "true"); + try (AzureBlobFileSystem fs = getFileSystem()) { + assumeRecoveryThroughClientTransactionID(fs, false); AbfsClient abfsClient = Mockito.spy(fs.getAbfsClient()); fs.getAbfsStore().setClient(abfsClient); Path sourceDir = path("/testSrc"); @@ -1736,10 +1736,8 @@ public void answer(final AbfsRestOperation mockedObj, */ @Test public void getClientTransactionIdAfterRename() throws Exception { - Configuration config = new Configuration(this.getRawConfiguration()); - config.set(FS_AZURE_ENABLE_CLIENT_TRANSACTION_ID, "true"); - try (final AzureBlobFileSystem fs = getFileSystem(config)) { - assumeDfsServiceType(); + try (AzureBlobFileSystem fs = getFileSystem()) { + assumeRecoveryThroughClientTransactionID(fs, false); AbfsDfsClient abfsDfsClient = (AbfsDfsClient) fs.getAbfsClient(); Path sourceDir = path("/testSrc"); assertMkdirs(fs, sourceDir); diff --git a/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azurebfs/services/TestAbfsClient.java b/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azurebfs/services/TestAbfsClient.java index c8efa0f3b6878..8c918d8f1c697 100644 --- a/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azurebfs/services/TestAbfsClient.java +++ b/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azurebfs/services/TestAbfsClient.java @@ -167,6 +167,7 @@ public static void mockAbfsOperationCreation(final AbfsClient abfsClient, return null; }).when(op) .execute(any()); + Mockito.doReturn(true).when(op).isARetriedRequest(); return op; }).when(abfsClient) .getAbfsRestOperation(any(), any(), any(), any()); diff --git a/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azurebfs/services/TestAbfsRenameRetryRecovery.java b/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azurebfs/services/TestAbfsRenameRetryRecovery.java index 12d20a65e347e..ff52072876a0b 100644 --- a/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azurebfs/services/TestAbfsRenameRetryRecovery.java +++ b/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azurebfs/services/TestAbfsRenameRetryRecovery.java @@ -78,7 +78,7 @@ public class TestAbfsRenameRetryRecovery extends AbstractAbfsIntegrationTest { public TestAbfsRenameRetryRecovery() throws Exception { isNamespaceEnabled = getConfiguration() .getBoolean(TestConfigurationKeys.FS_AZURE_TEST_NAMESPACE_ENABLED_ACCOUNT, false); - Assume.assumeTrue(getConfiguration().getIsClientTransactionIdEnabled()); + Assume.assumeFalse(getConfiguration().getIsClientTransactionIdEnabled()); } /** @@ -276,15 +276,9 @@ public void testRenameRecoveryEtagMatchFsLevel() throws IOException { // 5 calls should have happened in total for rename // 1 -> original rename rest call, 2 -> first retry, // +2 for getPathStatus calls - Long expectedConnectionsMade = 5 + connMadeBeforeRename; - if (fs.getAbfsStore().getAbfsConfiguration() - .getIsClientTransactionIdEnabled()) { - // 1 extra connection for the HEAD request to get the source path status - expectedConnectionsMade++; - } assertThatStatisticCounter(ioStats, - CONNECTIONS_MADE.getStatName()) - .isEqualTo(expectedConnectionsMade); + CONNECTIONS_MADE.getStatName()) + .isEqualTo(5 + connMadeBeforeRename); // the RENAME_PATH_ATTEMPTS stat should be incremented by 1 // retries happen internally within AbfsRestOperation execute() // the stat for RENAME_PATH_ATTEMPTS is updated only once before execute() is called @@ -360,15 +354,9 @@ public void testRenameRecoveryFailsForDirFsLevel() throws Exception { // 1 -> original rename rest call, 2 -> first retry, // +1 for getPathStatus calls // last getPathStatus call should be skipped - Long expectedConnectionsMade = 4 + connMadeBeforeRename; - if (fs.getAbfsStore().getAbfsConfiguration() - .getIsClientTransactionIdEnabled()) { - // 1 extra connection for the HEAD request to get the source path status - expectedConnectionsMade++; - } assertThatStatisticCounter(ioStats, - CONNECTIONS_MADE.getStatName()) - .isEqualTo(expectedConnectionsMade); + CONNECTIONS_MADE.getStatName()) + .isEqualTo(4 + connMadeBeforeRename); From c05dd77483795b7442c42e7e6bf31d4fd067f7bb Mon Sep 17 00:00:00 2001 From: bhattmanish98 Date: Sun, 16 Feb 2025 04:07:17 -0800 Subject: [PATCH 05/15] Reverted non-mandatory changes --- .../azurebfs/ITestAzureBlobFileSystemCreate.java | 2 +- .../apache/hadoop/fs/azurebfs/MockIntercept.java | 2 +- .../services/TestAbfsRenameRetryRecovery.java | 14 ++++++-------- 3 files changed, 8 insertions(+), 10 deletions(-) diff --git a/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azurebfs/ITestAzureBlobFileSystemCreate.java b/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azurebfs/ITestAzureBlobFileSystemCreate.java index e8afe36c5c331..b3314894d3f99 100644 --- a/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azurebfs/ITestAzureBlobFileSystemCreate.java +++ b/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azurebfs/ITestAzureBlobFileSystemCreate.java @@ -496,6 +496,7 @@ public void testCreateFileOverwrite(boolean enableConditionalCreateOverwrite) // Only single tryGetFileStatus should happen // 1. getFileStatus on DFS endpoint : 1 // getFileStatus on Blob endpoint: 1 (No Additional List blob call as file exists) + createRequestCount += ( client instanceof AbfsBlobClient && !getIsNamespaceEnabled(fs) ? 2 @@ -551,7 +552,6 @@ public void testCreateFileOverwrite(boolean enableConditionalCreateOverwrite) } else { createRequestCount += (client instanceof AbfsBlobClient && !getIsNamespaceEnabled(fs)) ? 2 : 1; - } assertAbfsStatistics( diff --git a/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azurebfs/MockIntercept.java b/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azurebfs/MockIntercept.java index 43a4873c6cfd6..2c44ea896deca 100644 --- a/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azurebfs/MockIntercept.java +++ b/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azurebfs/MockIntercept.java @@ -40,4 +40,4 @@ public interface MockIntercept { * mock operation handling */ void answer(T mockedObj, InvocationOnMock answer) throws AbfsRestOperationException; -} \ No newline at end of file +} diff --git a/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azurebfs/services/TestAbfsRenameRetryRecovery.java b/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azurebfs/services/TestAbfsRenameRetryRecovery.java index ff52072876a0b..cbd13673b3add 100644 --- a/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azurebfs/services/TestAbfsRenameRetryRecovery.java +++ b/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azurebfs/services/TestAbfsRenameRetryRecovery.java @@ -273,12 +273,12 @@ public void testRenameRecoveryEtagMatchFsLevel() throws IOException { fs.rename(new Path(path1), new Path(path2)); // validating stat counters after rename - // 5 calls should have happened in total for rename + // 4 calls should have happened in total for rename // 1 -> original rename rest call, 2 -> first retry, // +2 for getPathStatus calls assertThatStatisticCounter(ioStats, - CONNECTIONS_MADE.getStatName()) - .isEqualTo(5 + connMadeBeforeRename); + CONNECTIONS_MADE.getStatName()) + .isEqualTo(5 + connMadeBeforeRename); // the RENAME_PATH_ATTEMPTS stat should be incremented by 1 // retries happen internally within AbfsRestOperation execute() // the stat for RENAME_PATH_ATTEMPTS is updated only once before execute() is called @@ -350,15 +350,13 @@ public void testRenameRecoveryFailsForDirFsLevel() throws Exception { assertEquals(false, renameResult); // validating stat counters after rename - // 4 calls should have happened in total for rename + // 3 calls should have happened in total for rename // 1 -> original rename rest call, 2 -> first retry, // +1 for getPathStatus calls // last getPathStatus call should be skipped assertThatStatisticCounter(ioStats, - CONNECTIONS_MADE.getStatName()) - .isEqualTo(4 + connMadeBeforeRename); - - + CONNECTIONS_MADE.getStatName()) + .isEqualTo(4 + connMadeBeforeRename); // the RENAME_PATH_ATTEMPTS stat should be incremented by 1 // retries happen internally within AbfsRestOperation execute() From 73743981341184e0a690bfa050f9ca27c7693169 Mon Sep 17 00:00:00 2001 From: bhattmanish98 Date: Sun, 16 Feb 2025 06:17:46 -0800 Subject: [PATCH 06/15] Checkstyle fixes --- .../apache/hadoop/fs/azurebfs/services/AbfsDfsClient.java | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/services/AbfsDfsClient.java b/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/services/AbfsDfsClient.java index c4ccc1813dd6a..3458d496c6e52 100644 --- a/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/services/AbfsDfsClient.java +++ b/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/services/AbfsDfsClient.java @@ -417,10 +417,8 @@ public AbfsRestOperation createPath(final String path, } else { // recovery using client transaction id only if it is a retried request. if (op.isARetriedRequest() && clientTransactionId != null - && (op.getResult().getStatusCode() == - HttpURLConnection.HTTP_CONFLICT - || op.getResult().getStatusCode() == - HttpURLConnection.HTTP_PRECON_FAILED)) { + && (op.getResult().getStatusCode() == HttpURLConnection.HTTP_CONFLICT + || op.getResult().getStatusCode() == HttpURLConnection.HTTP_PRECON_FAILED)) { try { final AbfsHttpOperation getPathStatusOp = getPathStatus(path, false, From c3f789f99402ff1153995f9b159161e92485aa0e Mon Sep 17 00:00:00 2001 From: bhattmanish98 Date: Mon, 17 Feb 2025 11:02:56 -0800 Subject: [PATCH 07/15] Mocked addClientTransactionIdToHeader method --- .../fs/azurebfs/AzureBlobFileSystemStore.java | 3 +- .../constants/FileSystemConfigurations.java | 2 +- .../fs/azurebfs/services/AbfsDfsClient.java | 4 +- .../azurebfs/AbstractAbfsIntegrationTest.java | 31 +++++++++-- .../ITestAzureBlobFileSystemMkDir.java | 52 ++++++++++++++----- .../ITestAzureBlobFileSystemRename.java | 17 ++++-- 6 files changed, 84 insertions(+), 25 deletions(-) diff --git a/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/AzureBlobFileSystemStore.java b/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/AzureBlobFileSystemStore.java index fd627ff30af55..867edfd1438fe 100644 --- a/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/AzureBlobFileSystemStore.java +++ b/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/AzureBlobFileSystemStore.java @@ -54,6 +54,7 @@ import org.slf4j.Logger; import org.slf4j.LoggerFactory; +import org.apache.commons.lang3.StringUtils; import org.apache.hadoop.classification.InterfaceAudience; import org.apache.hadoop.classification.InterfaceStability; import org.apache.hadoop.classification.VisibleForTesting; @@ -1875,7 +1876,7 @@ private long extractContentLength(AbfsHttpOperation op) { long contentLength; String contentLengthHeader = op.getResponseHeader( HttpHeaderConfigurations.CONTENT_LENGTH); - if (!Strings.isNullOrEmpty(contentLengthHeader)) { + if (!StringUtils.isEmpty(contentLengthHeader)) { contentLength = Long.parseLong(contentLengthHeader); } else { contentLength = 0; diff --git a/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/constants/FileSystemConfigurations.java b/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/constants/FileSystemConfigurations.java index 6072fcbb6fa08..ea982f8a35f34 100644 --- a/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/constants/FileSystemConfigurations.java +++ b/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/constants/FileSystemConfigurations.java @@ -198,7 +198,7 @@ public final class FileSystemConfigurations { public static final int DEFAULT_FS_AZURE_BLOB_DELETE_THREAD = DEFAULT_FS_AZURE_LISTING_ACTION_THREADS; - public static final boolean DEFAULT_FS_AZURE_ENABLE_CLIENT_TRANSACTION_ID = false; + public static final boolean DEFAULT_FS_AZURE_ENABLE_CLIENT_TRANSACTION_ID = true; private FileSystemConfigurations() {} } diff --git a/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/services/AbfsDfsClient.java b/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/services/AbfsDfsClient.java index 3458d496c6e52..f57e6a022d5b9 100644 --- a/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/services/AbfsDfsClient.java +++ b/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/services/AbfsDfsClient.java @@ -40,6 +40,7 @@ import com.fasterxml.jackson.core.JsonToken; import com.fasterxml.jackson.databind.ObjectMapper; +import org.apache.hadoop.classification.VisibleForTesting; import org.apache.hadoop.fs.FileSystem; import org.apache.hadoop.fs.Path; import org.apache.hadoop.fs.azurebfs.AbfsConfiguration; @@ -1635,7 +1636,8 @@ private Hashtable parseCommaSeparatedXmsProperties(String xMsPro * * @return client transaction id */ - private String addClientTransactionIdToHeader(List requestHeaders) { + @VisibleForTesting + public String addClientTransactionIdToHeader(List requestHeaders) { String clientTransactionId = null; // Set client transaction ID if the namespace and client transaction ID config are enabled. if (getIsNamespaceEnabled() && getAbfsConfiguration().getIsClientTransactionIdEnabled()) { diff --git a/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azurebfs/AbstractAbfsIntegrationTest.java b/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azurebfs/AbstractAbfsIntegrationTest.java index 647364c5fdce0..452f042bb7f03 100644 --- a/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azurebfs/AbstractAbfsIntegrationTest.java +++ b/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azurebfs/AbstractAbfsIntegrationTest.java @@ -32,6 +32,7 @@ import org.junit.After; import org.junit.Assume; import org.junit.Before; +import org.mockito.Mockito; import org.slf4j.Logger; import org.slf4j.LoggerFactory; @@ -47,6 +48,7 @@ import org.apache.hadoop.fs.azurebfs.security.AbfsDelegationTokenManager; import org.apache.hadoop.fs.azurebfs.services.AbfsClient; import org.apache.hadoop.fs.azurebfs.services.AbfsDfsClient; +import org.apache.hadoop.fs.azurebfs.services.AbfsHttpHeader; import org.apache.hadoop.fs.azurebfs.services.AbfsOutputStream; import org.apache.hadoop.fs.azurebfs.services.AuthType; import org.apache.hadoop.fs.azurebfs.services.ITestAbfsClient; @@ -71,6 +73,7 @@ import static org.apache.hadoop.fs.azurebfs.constants.FileSystemUriSchemes.ABFS_BLOB_DOMAIN_NAME; import static org.apache.hadoop.fs.azurebfs.constants.FileSystemUriSchemes.ABFS_DFS_DOMAIN_NAME; import static org.apache.hadoop.fs.azurebfs.constants.FileSystemUriSchemes.HTTPS_SCHEME; +import static org.apache.hadoop.fs.azurebfs.constants.HttpHeaderConfigurations.X_MS_CLIENT_TRANSACTION_ID; import static org.apache.hadoop.fs.azurebfs.contracts.services.AzureServiceErrorCode.FILE_SYSTEM_NOT_FOUND; import static org.apache.hadoop.fs.azurebfs.constants.TestConfigurationKeys.*; import static org.apache.hadoop.test.LambdaTestUtils.intercept; @@ -757,12 +760,30 @@ protected void assumeRecoveryThroughClientTransactionID( if (isCreate) { // Assume that create client is DFS client. Assume.assumeTrue( - fs.getAbfsStore().getClientHandler().getIngressClient() - instanceof AbfsDfsClient); + AbfsServiceType.DFS.equals( + fs.getAbfsStore().getAbfsConfiguration().getIngressServiceType())); // Assume that append blob is not enabled in DFS client. - Assume.assumeTrue( - StringUtils.isEmpty( - fs.getAbfsStore().getAbfsConfiguration().getAppendBlobDirs())); + Assume.assumeFalse(isAppendBlobEnabled()); } } + + /** + * Mocks the behavior of adding a client transaction ID to the request headers + * for the given AzureBlobFileSystem. This method generates a random transaction ID + * and adds it to the headers of the {@link AbfsDfsClient}. + * + * @param abfsDfsClient The {@link AbfsDfsClient} mocked AbfsDfsClient. + * @param clientTransactionId An array to hold the generated transaction ID. + */ + protected void mockAddClientTransactionIdToHeader(AbfsDfsClient abfsDfsClient, + String[] clientTransactionId) { + Mockito.doAnswer( addClientTransactionId -> { + clientTransactionId[0] = UUID.randomUUID().toString(); + List headers = addClientTransactionId.getArgument(0); + headers.add( + new AbfsHttpHeader(X_MS_CLIENT_TRANSACTION_ID, + clientTransactionId[0])); + return clientTransactionId[0]; + }).when(abfsDfsClient).addClientTransactionIdToHeader(Mockito.anyList()); + } } diff --git a/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azurebfs/ITestAzureBlobFileSystemMkDir.java b/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azurebfs/ITestAzureBlobFileSystemMkDir.java index 8913bb61a084f..be1f7d236e020 100644 --- a/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azurebfs/ITestAzureBlobFileSystemMkDir.java +++ b/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azurebfs/ITestAzureBlobFileSystemMkDir.java @@ -48,6 +48,8 @@ import static java.net.HttpURLConnection.HTTP_CONFLICT; import static org.apache.hadoop.fs.azurebfs.AbfsStatistic.CONNECTIONS_MADE; +import static org.apache.hadoop.fs.azurebfs.constants.AbfsHttpConstants.EMPTY_STRING; +import static org.apache.hadoop.fs.azurebfs.constants.AbfsHttpConstants.HTTP_METHOD_PUT; import static org.apache.hadoop.fs.azurebfs.constants.ConfigurationKeys.FS_AZURE_ENABLE_CLIENT_TRANSACTION_ID; import static org.apache.hadoop.fs.azurebfs.constants.ConfigurationKeys.FS_AZURE_ENABLE_MKDIR_OVERWRITE; import static org.apache.hadoop.fs.azurebfs.constants.FileSystemConfigurations.DEFAULT_FS_AZURE_ENABLE_MKDIR_OVERWRITE; @@ -201,13 +203,7 @@ public void createPathRetryIdempotency() throws Exception { configuration.set(FS_AZURE_ENABLE_CLIENT_TRANSACTION_ID, "true"); try (AzureBlobFileSystem fs = getFileSystem(configuration)) { assumeRecoveryThroughClientTransactionID(fs, true); - AzureBlobFileSystemStore store = Mockito.spy(fs.getAbfsStore()); - AbfsClientHandler clientHandler = Mockito.spy(store.getClientHandler()); - AbfsDfsClient abfsClient = (AbfsDfsClient) Mockito.spy( - clientHandler.getClient()); - fs.getAbfsStore().setClient(abfsClient); - fs.getAbfsStore().setClientHandler(clientHandler); - Mockito.doReturn(abfsClient).when(clientHandler).getIngressClient(); + AbfsDfsClient abfsClient = mockIngressClientHandler(fs); final Path nonOverwriteFile = new Path( "/NonOverwriteTest_FileName_" + UUID.randomUUID()); final List headers = new ArrayList<>(); @@ -222,14 +218,14 @@ public void answer(final AbfsRestOperation mockedObj, if (count == 0) { count = 1; AbfsHttpOperation op = Mockito.mock(AbfsHttpOperation.class); - Mockito.doReturn("PUT").when(op).getMethod(); - Mockito.doReturn("").when(op).getStorageErrorMessage(); + Mockito.doReturn(HTTP_METHOD_PUT).when(op).getMethod(); + Mockito.doReturn(EMPTY_STRING).when(op).getStorageErrorMessage(); Mockito.doReturn(true).when(mockedObj).hasResult(); Mockito.doReturn(op).when(mockedObj).getResult(); Mockito.doReturn(HTTP_CONFLICT).when(op).getStatusCode(); headers.addAll(mockedObj.getRequestHeaders()); throw new AbfsRestOperationException(HTTP_CONFLICT, - AzureServiceErrorCode.PATH_CONFLICT.getErrorCode(), "", + AzureServiceErrorCode.PATH_CONFLICT.getErrorCode(), EMPTY_STRING, null, op); } } @@ -273,7 +269,9 @@ public void answer(final AbfsRestOperation mockedObj, public void getClientTransactionIdAfterCreate() throws Exception { try (AzureBlobFileSystem fs = getFileSystem()) { assumeRecoveryThroughClientTransactionID(fs, true); - AbfsDfsClient abfsDfsClient = (AbfsDfsClient) fs.getAbfsClient(); + final String[] clientTransactionId = new String[1]; + AbfsDfsClient abfsDfsClient = mockIngressClientHandler(fs); + mockAddClientTransactionIdToHeader(abfsDfsClient, clientTransactionId); final Path nonOverwriteFile = new Path( "/NonOverwriteTest_FileName_" + UUID.randomUUID()); fs.create(nonOverwriteFile, false); @@ -285,6 +283,10 @@ public void getClientTransactionIdAfterCreate() throws Exception { getPathStatusOp.getResponseHeader(X_MS_CLIENT_TRANSACTION_ID)) .describedAs("Client transaction ID should be set during create") .isNotNull(); + Assertions.assertThat( + getPathStatusOp.getResponseHeader(X_MS_CLIENT_TRANSACTION_ID)) + .describedAs("Client transaction ID should be equal to the one set in the header") + .isEqualTo(clientTransactionId[0]); } } @@ -304,12 +306,14 @@ public void getClientTransactionIdAfterCreate() throws Exception { public void testClientTransactionIdAfterTwoCreateCalls() throws Exception { try (AzureBlobFileSystem fs = getFileSystem()) { assumeRecoveryThroughClientTransactionID(fs, true); - AbfsDfsClient abfsDfsClient = (AbfsDfsClient) fs.getAbfsClient(); + final String[] clientTransactionId = new String[1]; + AbfsDfsClient abfsDfsClient = mockIngressClientHandler(fs); + mockAddClientTransactionIdToHeader(abfsDfsClient, clientTransactionId); Path testPath = path("testfile"); AzureBlobFileSystemStore.Permissions permissions = new AzureBlobFileSystemStore.Permissions(false, FsPermission.getDefault(), FsPermission.getUMask(fs.getConf())); - fs.create(testPath, false); + fs.create(testPath, false); //5ff449d1-b5d2-478c-9722-8e26ebb5501e fs.create(testPath, true); final AbfsHttpOperation getPathStatusOp = abfsDfsClient.getPathStatus(testPath.toUri().getPath(), false, @@ -318,6 +322,28 @@ public void testClientTransactionIdAfterTwoCreateCalls() throws Exception { getPathStatusOp.getResponseHeader(X_MS_CLIENT_TRANSACTION_ID)) .describedAs("Client transaction ID should be set during create") .isNotNull(); + Assertions.assertThat( + getPathStatusOp.getResponseHeader(X_MS_CLIENT_TRANSACTION_ID)) + .describedAs("Client transaction ID should be equal to the one set in the header") + .isEqualTo(clientTransactionId[0]); } } + + /** + * Mocks and returns an instance of {@link AbfsDfsClient} for the given AzureBlobFileSystem. + * This method sets up the necessary mock behavior for the client handler and ingress client. + * + * @param fs The {@link AzureBlobFileSystem} instance for which the client handler will be mocked. + * @return A mocked {@link AbfsDfsClient} instance associated with the provided file system. + */ + private AbfsDfsClient mockIngressClientHandler(AzureBlobFileSystem fs) { + AzureBlobFileSystemStore store = Mockito.spy(fs.getAbfsStore()); + AbfsClientHandler clientHandler = Mockito.spy(store.getClientHandler()); + AbfsDfsClient abfsDfsClient = (AbfsDfsClient) Mockito.spy( + clientHandler.getClient()); + fs.getAbfsStore().setClient(abfsDfsClient); + fs.getAbfsStore().setClientHandler(clientHandler); + Mockito.doReturn(abfsDfsClient).when(clientHandler).getIngressClient(); + return abfsDfsClient; + } } diff --git a/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azurebfs/ITestAzureBlobFileSystemRename.java b/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azurebfs/ITestAzureBlobFileSystemRename.java index ba5baa16a754c..fbe5b6e510d71 100644 --- a/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azurebfs/ITestAzureBlobFileSystemRename.java +++ b/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azurebfs/ITestAzureBlobFileSystemRename.java @@ -74,6 +74,8 @@ import static org.apache.hadoop.fs.azurebfs.constants.AbfsHttpConstants.COPY_STATUS_FAILED; import static org.apache.hadoop.fs.azurebfs.constants.AbfsHttpConstants.COPY_STATUS_PENDING; import static org.apache.hadoop.fs.azurebfs.constants.AbfsHttpConstants.DIRECTORY; +import static org.apache.hadoop.fs.azurebfs.constants.AbfsHttpConstants.EMPTY_STRING; +import static org.apache.hadoop.fs.azurebfs.constants.AbfsHttpConstants.HTTP_METHOD_PUT; import static org.apache.hadoop.fs.azurebfs.constants.AbfsHttpConstants.ROOT_PATH; import static org.apache.hadoop.fs.azurebfs.constants.ConfigurationKeys.FS_AZURE_ENABLE_CLIENT_TRANSACTION_ID; import static org.apache.hadoop.fs.azurebfs.constants.ConfigurationKeys.FS_AZURE_LEASE_THREADS; @@ -1683,8 +1685,8 @@ public void answer(final AbfsRestOperation mockedObj, if (count == 0) { count = 1; AbfsHttpOperation op = Mockito.mock(AbfsHttpOperation.class); - Mockito.doReturn("PUT").when(op).getMethod(); - Mockito.doReturn("").when(op).getStorageErrorMessage(); + Mockito.doReturn(HTTP_METHOD_PUT).when(op).getMethod(); + Mockito.doReturn(EMPTY_STRING).when(op).getStorageErrorMessage(); Mockito.doReturn(SOURCE_PATH_NOT_FOUND.getErrorCode()).when(op) .getStorageErrorCode(); Mockito.doReturn(true).when(mockedObj).hasResult(); @@ -1692,7 +1694,7 @@ public void answer(final AbfsRestOperation mockedObj, Mockito.doReturn(HTTP_NOT_FOUND).when(op).getStatusCode(); headers.addAll(mockedObj.getRequestHeaders()); throw new AbfsRestOperationException(HTTP_NOT_FOUND, - SOURCE_PATH_NOT_FOUND.getErrorCode(), "", null, op); + SOURCE_PATH_NOT_FOUND.getErrorCode(), EMPTY_STRING, null, op); } } }); @@ -1738,7 +1740,10 @@ public void answer(final AbfsRestOperation mockedObj, public void getClientTransactionIdAfterRename() throws Exception { try (AzureBlobFileSystem fs = getFileSystem()) { assumeRecoveryThroughClientTransactionID(fs, false); - AbfsDfsClient abfsDfsClient = (AbfsDfsClient) fs.getAbfsClient(); + AbfsDfsClient abfsDfsClient = (AbfsDfsClient) Mockito.spy(fs.getAbfsClient()); + fs.getAbfsStore().setClient(abfsDfsClient); + final String[] clientTransactionId = new String[1]; + mockAddClientTransactionIdToHeader(abfsDfsClient, clientTransactionId); Path sourceDir = path("/testSrc"); assertMkdirs(fs, sourceDir); String filename = "file1"; @@ -1754,6 +1759,10 @@ public void getClientTransactionIdAfterRename() throws Exception { getPathStatusOp.getResponseHeader(X_MS_CLIENT_TRANSACTION_ID)) .describedAs("Client transaction id should be present in dest file") .isNotNull(); + Assertions.assertThat( + getPathStatusOp.getResponseHeader(X_MS_CLIENT_TRANSACTION_ID)) + .describedAs("Client transaction ID should be equal to the one set in the header") + .isEqualTo(clientTransactionId[0]); } } } From c91031776b11a54bc65bf4a1ec05977efb270cb8 Mon Sep 17 00:00:00 2001 From: bhattmanish98 Date: Tue, 18 Feb 2025 00:34:50 -0800 Subject: [PATCH 08/15] Checkstyle Fix --- .../hadoop/fs/azurebfs/constants/FileSystemConfigurations.java | 2 +- .../apache/hadoop/fs/azurebfs/AbstractAbfsIntegrationTest.java | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/constants/FileSystemConfigurations.java b/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/constants/FileSystemConfigurations.java index ea982f8a35f34..6072fcbb6fa08 100644 --- a/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/constants/FileSystemConfigurations.java +++ b/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/constants/FileSystemConfigurations.java @@ -198,7 +198,7 @@ public final class FileSystemConfigurations { public static final int DEFAULT_FS_AZURE_BLOB_DELETE_THREAD = DEFAULT_FS_AZURE_LISTING_ACTION_THREADS; - public static final boolean DEFAULT_FS_AZURE_ENABLE_CLIENT_TRANSACTION_ID = true; + public static final boolean DEFAULT_FS_AZURE_ENABLE_CLIENT_TRANSACTION_ID = false; private FileSystemConfigurations() {} } diff --git a/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azurebfs/AbstractAbfsIntegrationTest.java b/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azurebfs/AbstractAbfsIntegrationTest.java index 452f042bb7f03..02619e155ba8d 100644 --- a/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azurebfs/AbstractAbfsIntegrationTest.java +++ b/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azurebfs/AbstractAbfsIntegrationTest.java @@ -777,7 +777,7 @@ protected void assumeRecoveryThroughClientTransactionID( */ protected void mockAddClientTransactionIdToHeader(AbfsDfsClient abfsDfsClient, String[] clientTransactionId) { - Mockito.doAnswer( addClientTransactionId -> { + Mockito.doAnswer(addClientTransactionId -> { clientTransactionId[0] = UUID.randomUUID().toString(); List headers = addClientTransactionId.getArgument(0); headers.add( From 7a0017b02c74e0024e0c803b561750294b55050f Mon Sep 17 00:00:00 2001 From: bhattmanish98 Date: Sun, 23 Feb 2025 23:46:59 -0800 Subject: [PATCH 09/15] Moved Create idempotency test cases to create class --- .../azurebfs/AbstractAbfsIntegrationTest.java | 39 +--- .../ITestAzureBlobFileSystemCreate.java | 168 ++++++++++++++++ .../ITestAzureBlobFileSystemMkDir.java | 179 ------------------ .../ITestAzureBlobFileSystemRename.java | 5 +- .../azurebfs/services/AbfsClientTestUtil.java | 22 +++ 5 files changed, 198 insertions(+), 215 deletions(-) diff --git a/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azurebfs/AbstractAbfsIntegrationTest.java b/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azurebfs/AbstractAbfsIntegrationTest.java index 02619e155ba8d..3372cadefe1a9 100644 --- a/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azurebfs/AbstractAbfsIntegrationTest.java +++ b/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azurebfs/AbstractAbfsIntegrationTest.java @@ -32,7 +32,6 @@ import org.junit.After; import org.junit.Assume; import org.junit.Before; -import org.mockito.Mockito; import org.slf4j.Logger; import org.slf4j.LoggerFactory; @@ -47,8 +46,6 @@ import org.apache.hadoop.fs.azurebfs.oauth2.AccessTokenProvider; import org.apache.hadoop.fs.azurebfs.security.AbfsDelegationTokenManager; import org.apache.hadoop.fs.azurebfs.services.AbfsClient; -import org.apache.hadoop.fs.azurebfs.services.AbfsDfsClient; -import org.apache.hadoop.fs.azurebfs.services.AbfsHttpHeader; import org.apache.hadoop.fs.azurebfs.services.AbfsOutputStream; import org.apache.hadoop.fs.azurebfs.services.AuthType; import org.apache.hadoop.fs.azurebfs.services.ITestAbfsClient; @@ -73,7 +70,6 @@ import static org.apache.hadoop.fs.azurebfs.constants.FileSystemUriSchemes.ABFS_BLOB_DOMAIN_NAME; import static org.apache.hadoop.fs.azurebfs.constants.FileSystemUriSchemes.ABFS_DFS_DOMAIN_NAME; import static org.apache.hadoop.fs.azurebfs.constants.FileSystemUriSchemes.HTTPS_SCHEME; -import static org.apache.hadoop.fs.azurebfs.constants.HttpHeaderConfigurations.X_MS_CLIENT_TRANSACTION_ID; import static org.apache.hadoop.fs.azurebfs.contracts.services.AzureServiceErrorCode.FILE_SYSTEM_NOT_FOUND; import static org.apache.hadoop.fs.azurebfs.constants.TestConfigurationKeys.*; import static org.apache.hadoop.test.LambdaTestUtils.intercept; @@ -744,46 +740,21 @@ protected void checkFuturesForExceptions(List> futures, int exceptionV * Service type is DFS. * Assumes that the client transaction ID is enabled in the configuration. * - * @param fs the AzureBlobFileSystem instance to check - * @throws AzureBlobFileSystemException in case of an error + * @throws IOException in case of an error */ - protected void assumeRecoveryThroughClientTransactionID( - AzureBlobFileSystem fs, boolean isCreate) - throws AzureBlobFileSystemException { + protected void assumeRecoveryThroughClientTransactionID(boolean isCreate) + throws IOException { // Assumes that recovery through client transaction ID is enabled. Assume.assumeTrue(getConfiguration().getIsClientTransactionIdEnabled()); // Assumes that service type is DFS. assumeDfsServiceType(); // Assumes that namespace is enabled for the given AzureBlobFileSystem. - Assume.assumeTrue( - fs.getIsNamespaceEnabled(getTestTracingContext(fs, true))); + assumeHnsEnabled(); if (isCreate) { // Assume that create client is DFS client. - Assume.assumeTrue( - AbfsServiceType.DFS.equals( - fs.getAbfsStore().getAbfsConfiguration().getIngressServiceType())); + Assume.assumeTrue(AbfsServiceType.DFS.equals(getIngressServiceType())); // Assume that append blob is not enabled in DFS client. Assume.assumeFalse(isAppendBlobEnabled()); } } - - /** - * Mocks the behavior of adding a client transaction ID to the request headers - * for the given AzureBlobFileSystem. This method generates a random transaction ID - * and adds it to the headers of the {@link AbfsDfsClient}. - * - * @param abfsDfsClient The {@link AbfsDfsClient} mocked AbfsDfsClient. - * @param clientTransactionId An array to hold the generated transaction ID. - */ - protected void mockAddClientTransactionIdToHeader(AbfsDfsClient abfsDfsClient, - String[] clientTransactionId) { - Mockito.doAnswer(addClientTransactionId -> { - clientTransactionId[0] = UUID.randomUUID().toString(); - List headers = addClientTransactionId.getArgument(0); - headers.add( - new AbfsHttpHeader(X_MS_CLIENT_TRANSACTION_ID, - clientTransactionId[0])); - return clientTransactionId[0]; - }).when(abfsDfsClient).addClientTransactionIdToHeader(Mockito.anyList()); - } } diff --git a/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azurebfs/ITestAzureBlobFileSystemCreate.java b/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azurebfs/ITestAzureBlobFileSystemCreate.java index b3314894d3f99..b2dc7b21bc7d8 100644 --- a/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azurebfs/ITestAzureBlobFileSystemCreate.java +++ b/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azurebfs/ITestAzureBlobFileSystemCreate.java @@ -52,15 +52,19 @@ import org.apache.hadoop.fs.azurebfs.constants.FSOperationType; import org.apache.hadoop.fs.azurebfs.contracts.exceptions.AbfsRestOperationException; import org.apache.hadoop.fs.azurebfs.contracts.exceptions.ConcurrentWriteOperationDetectedException; +import org.apache.hadoop.fs.azurebfs.contracts.services.AzureServiceErrorCode; import org.apache.hadoop.fs.azurebfs.security.ContextEncryptionAdapter; import org.apache.hadoop.fs.azurebfs.services.AbfsBlobClient; import org.apache.hadoop.fs.azurebfs.services.AbfsClient; import org.apache.hadoop.fs.azurebfs.services.AbfsClientHandler; import org.apache.hadoop.fs.azurebfs.services.AbfsClientTestUtil; +import org.apache.hadoop.fs.azurebfs.services.AbfsDfsClient; +import org.apache.hadoop.fs.azurebfs.services.AbfsHttpHeader; import org.apache.hadoop.fs.azurebfs.services.AbfsHttpOperation; import org.apache.hadoop.fs.azurebfs.services.AbfsRestOperation; import org.apache.hadoop.fs.azurebfs.services.ITestAbfsClient; import org.apache.hadoop.fs.azurebfs.services.RenameAtomicity; +import org.apache.hadoop.fs.azurebfs.services.TestAbfsClient; import org.apache.hadoop.fs.azurebfs.utils.DirectoryStateHelper; import org.apache.hadoop.fs.azurebfs.utils.TracingContext; import org.apache.hadoop.fs.azurebfs.utils.TracingHeaderValidator; @@ -76,10 +80,15 @@ import static java.net.HttpURLConnection.HTTP_OK; import static java.net.HttpURLConnection.HTTP_PRECON_FAILED; import static org.apache.hadoop.fs.azurebfs.AbfsStatistic.CONNECTIONS_MADE; +import static org.apache.hadoop.fs.azurebfs.constants.AbfsHttpConstants.EMPTY_STRING; +import static org.apache.hadoop.fs.azurebfs.constants.AbfsHttpConstants.HTTP_METHOD_PUT; import static org.apache.hadoop.fs.azurebfs.constants.AbfsHttpConstants.ROOT_PATH; +import static org.apache.hadoop.fs.azurebfs.constants.ConfigurationKeys.FS_AZURE_ENABLE_CLIENT_TRANSACTION_ID; import static org.apache.hadoop.fs.azurebfs.constants.ConfigurationKeys.FS_AZURE_ENABLE_CONDITIONAL_CREATE_OVERWRITE; import static org.apache.hadoop.fs.azurebfs.constants.ConfigurationKeys.FS_AZURE_ENABLE_MKDIR_OVERWRITE; import static org.apache.hadoop.fs.azurebfs.constants.FileSystemConfigurations.ONE_MB; +import static org.apache.hadoop.fs.azurebfs.constants.HttpHeaderConfigurations.X_MS_CLIENT_TRANSACTION_ID; +import static org.apache.hadoop.fs.azurebfs.services.AbfsClientTestUtil.mockAddClientTransactionIdToHeader; import static org.apache.hadoop.fs.azurebfs.services.RenameAtomicity.SUFFIX; import static org.apache.hadoop.fs.contract.ContractTestUtils.assertIsFile; import static org.apache.hadoop.test.LambdaTestUtils.intercept; @@ -2062,4 +2071,163 @@ private String extractFileEtag(String fileName) throws IOException { op = client.getPathStatus(fileName, true, testTracingContext, null); return AzureBlobFileSystemStore.extractEtagHeader(op.getResult()); } + + /** + * Tests the idempotency of creating a path with retries by simulating + * a conflict response (HTTP 409) from the Azure Blob File System client. + * The method ensures that the path creation operation retries correctly + * with the proper transaction ID headers, verifying idempotency during + * failure recovery. + * + * @throws Exception if any error occurs during the operation. + */ + @Test + public void createPathRetryIdempotency() throws Exception { + Configuration configuration = new Configuration(getRawConfiguration()); + configuration.set(FS_AZURE_ENABLE_CLIENT_TRANSACTION_ID, "true"); + try (AzureBlobFileSystem fs = getFileSystem(configuration)) { + assumeRecoveryThroughClientTransactionID(true); + AbfsDfsClient abfsClient = mockIngressClientHandler(fs); + final Path nonOverwriteFile = new Path( + "/NonOverwriteTest_FileName_" + UUID.randomUUID()); + final List headers = new ArrayList<>(); + TestAbfsClient.mockAbfsOperationCreation(abfsClient, + new MockIntercept() { + private int count = 0; + + @Override + public void answer(final AbfsRestOperation mockedObj, + final InvocationOnMock answer) + throws AbfsRestOperationException { + if (count == 0) { + count = 1; + AbfsHttpOperation op = Mockito.mock(AbfsHttpOperation.class); + Mockito.doReturn(HTTP_METHOD_PUT).when(op).getMethod(); + Mockito.doReturn(EMPTY_STRING).when(op).getStorageErrorMessage(); + Mockito.doReturn(true).when(mockedObj).hasResult(); + Mockito.doReturn(op).when(mockedObj).getResult(); + Mockito.doReturn(HTTP_CONFLICT).when(op).getStatusCode(); + headers.addAll(mockedObj.getRequestHeaders()); + throw new AbfsRestOperationException(HTTP_CONFLICT, + AzureServiceErrorCode.PATH_CONFLICT.getErrorCode(), EMPTY_STRING, + null, op); + } + } + }); + AbfsRestOperation getPathRestOp = Mockito.mock(AbfsRestOperation.class); + AbfsHttpOperation op = Mockito.mock(AbfsHttpOperation.class); + Mockito.doAnswer(answer -> { + String requiredHeader = null; + for (AbfsHttpHeader httpHeader : headers) { + if (X_MS_CLIENT_TRANSACTION_ID.equalsIgnoreCase( + httpHeader.getName())) { + requiredHeader = httpHeader.getValue(); + break; + } + } + return requiredHeader; + }).when(op).getResponseHeader(X_MS_CLIENT_TRANSACTION_ID); + Mockito.doReturn(true).when(getPathRestOp).hasResult(); + Mockito.doReturn(op).when(getPathRestOp).getResult(); + Mockito.doReturn(getPathRestOp).when(abfsClient).getPathStatus( + Mockito.nullable(String.class), Mockito.nullable(Boolean.class), + Mockito.nullable(TracingContext.class), + Mockito.nullable(ContextEncryptionAdapter.class)); + fs.create(nonOverwriteFile, false); + } + } + + /** + * Test to verify that the client transaction ID is included in the response header + * during the creation of a new file in Azure Blob Storage. + *

+ * This test ensures that when a new file is created, the Azure Blob FileSystem client + * correctly includes the client transaction ID in the response header for the created file. + * The test uses a configuration where client transaction ID is enabled and verifies + * its presence after the file creation operation. + *

+ * + * @throws Exception if any error occurs during test execution + */ + @Test + public void getClientTransactionIdAfterCreate() throws Exception { + try (AzureBlobFileSystem fs = getFileSystem()) { + assumeRecoveryThroughClientTransactionID(true); + final String[] clientTransactionId = new String[1]; + AbfsDfsClient abfsDfsClient = mockIngressClientHandler(fs); + mockAddClientTransactionIdToHeader(abfsDfsClient, clientTransactionId); + final Path nonOverwriteFile = new Path( + "/NonOverwriteTest_FileName_" + UUID.randomUUID()); + fs.create(nonOverwriteFile, false); + + final AbfsHttpOperation getPathStatusOp = + abfsDfsClient.getPathStatus(nonOverwriteFile.toUri().getPath(), false, + getTestTracingContext(fs, true), null).getResult(); + Assertions.assertThat( + getPathStatusOp.getResponseHeader(X_MS_CLIENT_TRANSACTION_ID)) + .describedAs("Client transaction ID should be set during create") + .isNotNull(); + Assertions.assertThat( + getPathStatusOp.getResponseHeader(X_MS_CLIENT_TRANSACTION_ID)) + .describedAs("Client transaction ID should be equal to the one set in the header") + .isEqualTo(clientTransactionId[0]); + } + } + + /** + * Test to verify that the client transaction ID is included in the response header + * after two consecutive create operations on the same file in Azure Blob Storage. + *

+ * This test ensures that even after performing two create operations (with overwrite) + * on the same file, the Azure Blob FileSystem client includes the client transaction ID + * in the response header for the created file. The test checks for the presence of + * the client transaction ID in the response after the second create call. + *

+ * + * @throws Exception if any error occurs during test execution + */ + @Test + public void testClientTransactionIdAfterTwoCreateCalls() throws Exception { + try (AzureBlobFileSystem fs = getFileSystem()) { + assumeRecoveryThroughClientTransactionID(true); + final String[] clientTransactionId = new String[1]; + AbfsDfsClient abfsDfsClient = mockIngressClientHandler(fs); + mockAddClientTransactionIdToHeader(abfsDfsClient, clientTransactionId); + Path testPath = path("testfile"); + AzureBlobFileSystemStore.Permissions permissions + = new AzureBlobFileSystemStore.Permissions(false, + FsPermission.getDefault(), FsPermission.getUMask(fs.getConf())); + fs.create(testPath, false); //5ff449d1-b5d2-478c-9722-8e26ebb5501e + fs.create(testPath, true); + final AbfsHttpOperation getPathStatusOp = + abfsDfsClient.getPathStatus(testPath.toUri().getPath(), false, + getTestTracingContext(fs, true), null).getResult(); + Assertions.assertThat( + getPathStatusOp.getResponseHeader(X_MS_CLIENT_TRANSACTION_ID)) + .describedAs("Client transaction ID should be set during create") + .isNotNull(); + Assertions.assertThat( + getPathStatusOp.getResponseHeader(X_MS_CLIENT_TRANSACTION_ID)) + .describedAs("Client transaction ID should be equal to the one set in the header") + .isEqualTo(clientTransactionId[0]); + } + } + + /** + * Mocks and returns an instance of {@link AbfsDfsClient} for the given AzureBlobFileSystem. + * This method sets up the necessary mock behavior for the client handler and ingress client. + * + * @param fs The {@link AzureBlobFileSystem} instance for which the client handler will be mocked. + * @return A mocked {@link AbfsDfsClient} instance associated with the provided file system. + */ + private AbfsDfsClient mockIngressClientHandler(AzureBlobFileSystem fs) { + AzureBlobFileSystemStore store = Mockito.spy(fs.getAbfsStore()); + AbfsClientHandler clientHandler = Mockito.spy(store.getClientHandler()); + AbfsDfsClient abfsDfsClient = (AbfsDfsClient) Mockito.spy( + clientHandler.getClient()); + fs.getAbfsStore().setClient(abfsDfsClient); + fs.getAbfsStore().setClientHandler(clientHandler); + Mockito.doReturn(abfsDfsClient).when(clientHandler).getIngressClient(); + return abfsDfsClient; + } } diff --git a/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azurebfs/ITestAzureBlobFileSystemMkDir.java b/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azurebfs/ITestAzureBlobFileSystemMkDir.java index be1f7d236e020..e54b98e0b7a6e 100644 --- a/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azurebfs/ITestAzureBlobFileSystemMkDir.java +++ b/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azurebfs/ITestAzureBlobFileSystemMkDir.java @@ -18,42 +18,22 @@ package org.apache.hadoop.fs.azurebfs; -import java.util.ArrayList; -import java.util.List; import java.util.UUID; -import org.assertj.core.api.Assertions; import org.junit.Assume; import org.junit.Test; import org.mockito.Mockito; -import org.mockito.invocation.InvocationOnMock; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.FileAlreadyExistsException; import org.apache.hadoop.fs.FileSystem; import org.apache.hadoop.fs.Path; -import org.apache.hadoop.fs.azurebfs.contracts.exceptions.AbfsRestOperationException; -import org.apache.hadoop.fs.azurebfs.contracts.services.AzureServiceErrorCode; -import org.apache.hadoop.fs.azurebfs.security.ContextEncryptionAdapter; import org.apache.hadoop.fs.azurebfs.services.AbfsBlobClient; import org.apache.hadoop.fs.azurebfs.services.AbfsClient; -import org.apache.hadoop.fs.azurebfs.services.AbfsClientHandler; -import org.apache.hadoop.fs.azurebfs.services.AbfsDfsClient; -import org.apache.hadoop.fs.azurebfs.services.AbfsHttpHeader; -import org.apache.hadoop.fs.azurebfs.services.AbfsHttpOperation; -import org.apache.hadoop.fs.azurebfs.services.AbfsRestOperation; -import org.apache.hadoop.fs.azurebfs.services.TestAbfsClient; -import org.apache.hadoop.fs.azurebfs.utils.TracingContext; -import org.apache.hadoop.fs.permission.FsPermission; -import static java.net.HttpURLConnection.HTTP_CONFLICT; import static org.apache.hadoop.fs.azurebfs.AbfsStatistic.CONNECTIONS_MADE; -import static org.apache.hadoop.fs.azurebfs.constants.AbfsHttpConstants.EMPTY_STRING; -import static org.apache.hadoop.fs.azurebfs.constants.AbfsHttpConstants.HTTP_METHOD_PUT; -import static org.apache.hadoop.fs.azurebfs.constants.ConfigurationKeys.FS_AZURE_ENABLE_CLIENT_TRANSACTION_ID; import static org.apache.hadoop.fs.azurebfs.constants.ConfigurationKeys.FS_AZURE_ENABLE_MKDIR_OVERWRITE; import static org.apache.hadoop.fs.azurebfs.constants.FileSystemConfigurations.DEFAULT_FS_AZURE_ENABLE_MKDIR_OVERWRITE; -import static org.apache.hadoop.fs.azurebfs.constants.HttpHeaderConfigurations.X_MS_CLIENT_TRANSACTION_ID; import static org.apache.hadoop.fs.contract.ContractTestUtils.assertMkdirs; import static org.apache.hadoop.test.LambdaTestUtils.intercept; @@ -187,163 +167,4 @@ public void testMkdirWithExistingFilename() throws Exception { intercept(FileAlreadyExistsException.class, () -> fs.mkdirs(new Path("/testFilePath"))); intercept(FileAlreadyExistsException.class, () -> fs.mkdirs(new Path("/testFilePath/newDir"))); } - - /** - * Tests the idempotency of creating a path with retries by simulating - * a conflict response (HTTP 409) from the Azure Blob File System client. - * The method ensures that the path creation operation retries correctly - * with the proper transaction ID headers, verifying idempotency during - * failure recovery. - * - * @throws Exception if any error occurs during the operation. - */ - @Test - public void createPathRetryIdempotency() throws Exception { - Configuration configuration = new Configuration(getRawConfiguration()); - configuration.set(FS_AZURE_ENABLE_CLIENT_TRANSACTION_ID, "true"); - try (AzureBlobFileSystem fs = getFileSystem(configuration)) { - assumeRecoveryThroughClientTransactionID(fs, true); - AbfsDfsClient abfsClient = mockIngressClientHandler(fs); - final Path nonOverwriteFile = new Path( - "/NonOverwriteTest_FileName_" + UUID.randomUUID()); - final List headers = new ArrayList<>(); - TestAbfsClient.mockAbfsOperationCreation(abfsClient, - new MockIntercept() { - private int count = 0; - - @Override - public void answer(final AbfsRestOperation mockedObj, - final InvocationOnMock answer) - throws AbfsRestOperationException { - if (count == 0) { - count = 1; - AbfsHttpOperation op = Mockito.mock(AbfsHttpOperation.class); - Mockito.doReturn(HTTP_METHOD_PUT).when(op).getMethod(); - Mockito.doReturn(EMPTY_STRING).when(op).getStorageErrorMessage(); - Mockito.doReturn(true).when(mockedObj).hasResult(); - Mockito.doReturn(op).when(mockedObj).getResult(); - Mockito.doReturn(HTTP_CONFLICT).when(op).getStatusCode(); - headers.addAll(mockedObj.getRequestHeaders()); - throw new AbfsRestOperationException(HTTP_CONFLICT, - AzureServiceErrorCode.PATH_CONFLICT.getErrorCode(), EMPTY_STRING, - null, op); - } - } - }); - AbfsRestOperation getPathRestOp = Mockito.mock(AbfsRestOperation.class); - AbfsHttpOperation op = Mockito.mock(AbfsHttpOperation.class); - Mockito.doAnswer(answer -> { - String requiredHeader = null; - for (AbfsHttpHeader httpHeader : headers) { - if (X_MS_CLIENT_TRANSACTION_ID.equalsIgnoreCase( - httpHeader.getName())) { - requiredHeader = httpHeader.getValue(); - break; - } - } - return requiredHeader; - }).when(op).getResponseHeader(X_MS_CLIENT_TRANSACTION_ID); - Mockito.doReturn(true).when(getPathRestOp).hasResult(); - Mockito.doReturn(op).when(getPathRestOp).getResult(); - Mockito.doReturn(getPathRestOp).when(abfsClient).getPathStatus( - Mockito.nullable(String.class), Mockito.nullable(Boolean.class), - Mockito.nullable(TracingContext.class), - Mockito.nullable(ContextEncryptionAdapter.class)); - fs.create(nonOverwriteFile, false); - } - } - - /** - * Test to verify that the client transaction ID is included in the response header - * during the creation of a new file in Azure Blob Storage. - *

- * This test ensures that when a new file is created, the Azure Blob FileSystem client - * correctly includes the client transaction ID in the response header for the created file. - * The test uses a configuration where client transaction ID is enabled and verifies - * its presence after the file creation operation. - *

- * - * @throws Exception if any error occurs during test execution - */ - @Test - public void getClientTransactionIdAfterCreate() throws Exception { - try (AzureBlobFileSystem fs = getFileSystem()) { - assumeRecoveryThroughClientTransactionID(fs, true); - final String[] clientTransactionId = new String[1]; - AbfsDfsClient abfsDfsClient = mockIngressClientHandler(fs); - mockAddClientTransactionIdToHeader(abfsDfsClient, clientTransactionId); - final Path nonOverwriteFile = new Path( - "/NonOverwriteTest_FileName_" + UUID.randomUUID()); - fs.create(nonOverwriteFile, false); - - final AbfsHttpOperation getPathStatusOp = - abfsDfsClient.getPathStatus(nonOverwriteFile.toUri().getPath(), false, - getTestTracingContext(fs, true), null).getResult(); - Assertions.assertThat( - getPathStatusOp.getResponseHeader(X_MS_CLIENT_TRANSACTION_ID)) - .describedAs("Client transaction ID should be set during create") - .isNotNull(); - Assertions.assertThat( - getPathStatusOp.getResponseHeader(X_MS_CLIENT_TRANSACTION_ID)) - .describedAs("Client transaction ID should be equal to the one set in the header") - .isEqualTo(clientTransactionId[0]); - } - } - - /** - * Test to verify that the client transaction ID is included in the response header - * after two consecutive create operations on the same file in Azure Blob Storage. - *

- * This test ensures that even after performing two create operations (with overwrite) - * on the same file, the Azure Blob FileSystem client includes the client transaction ID - * in the response header for the created file. The test checks for the presence of - * the client transaction ID in the response after the second create call. - *

- * - * @throws Exception if any error occurs during test execution - */ - @Test - public void testClientTransactionIdAfterTwoCreateCalls() throws Exception { - try (AzureBlobFileSystem fs = getFileSystem()) { - assumeRecoveryThroughClientTransactionID(fs, true); - final String[] clientTransactionId = new String[1]; - AbfsDfsClient abfsDfsClient = mockIngressClientHandler(fs); - mockAddClientTransactionIdToHeader(abfsDfsClient, clientTransactionId); - Path testPath = path("testfile"); - AzureBlobFileSystemStore.Permissions permissions - = new AzureBlobFileSystemStore.Permissions(false, - FsPermission.getDefault(), FsPermission.getUMask(fs.getConf())); - fs.create(testPath, false); //5ff449d1-b5d2-478c-9722-8e26ebb5501e - fs.create(testPath, true); - final AbfsHttpOperation getPathStatusOp = - abfsDfsClient.getPathStatus(testPath.toUri().getPath(), false, - getTestTracingContext(fs, true), null).getResult(); - Assertions.assertThat( - getPathStatusOp.getResponseHeader(X_MS_CLIENT_TRANSACTION_ID)) - .describedAs("Client transaction ID should be set during create") - .isNotNull(); - Assertions.assertThat( - getPathStatusOp.getResponseHeader(X_MS_CLIENT_TRANSACTION_ID)) - .describedAs("Client transaction ID should be equal to the one set in the header") - .isEqualTo(clientTransactionId[0]); - } - } - - /** - * Mocks and returns an instance of {@link AbfsDfsClient} for the given AzureBlobFileSystem. - * This method sets up the necessary mock behavior for the client handler and ingress client. - * - * @param fs The {@link AzureBlobFileSystem} instance for which the client handler will be mocked. - * @return A mocked {@link AbfsDfsClient} instance associated with the provided file system. - */ - private AbfsDfsClient mockIngressClientHandler(AzureBlobFileSystem fs) { - AzureBlobFileSystemStore store = Mockito.spy(fs.getAbfsStore()); - AbfsClientHandler clientHandler = Mockito.spy(store.getClientHandler()); - AbfsDfsClient abfsDfsClient = (AbfsDfsClient) Mockito.spy( - clientHandler.getClient()); - fs.getAbfsStore().setClient(abfsDfsClient); - fs.getAbfsStore().setClientHandler(clientHandler); - Mockito.doReturn(abfsDfsClient).when(clientHandler).getIngressClient(); - return abfsDfsClient; - } } diff --git a/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azurebfs/ITestAzureBlobFileSystemRename.java b/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azurebfs/ITestAzureBlobFileSystemRename.java index fbe5b6e510d71..641b50e716f6f 100644 --- a/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azurebfs/ITestAzureBlobFileSystemRename.java +++ b/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azurebfs/ITestAzureBlobFileSystemRename.java @@ -84,6 +84,7 @@ import static org.apache.hadoop.fs.azurebfs.contracts.services.AzureServiceErrorCode.COPY_BLOB_ABORTED; import static org.apache.hadoop.fs.azurebfs.contracts.services.AzureServiceErrorCode.COPY_BLOB_FAILED; import static org.apache.hadoop.fs.azurebfs.contracts.services.AzureServiceErrorCode.SOURCE_PATH_NOT_FOUND; +import static org.apache.hadoop.fs.azurebfs.services.AbfsClientTestUtil.mockAddClientTransactionIdToHeader; import static org.apache.hadoop.fs.azurebfs.services.RenameAtomicity.SUFFIX; import static org.apache.hadoop.fs.contract.ContractTestUtils.assertIsFile; import static org.apache.hadoop.fs.contract.ContractTestUtils.assertMkdirs; @@ -1664,7 +1665,7 @@ public void renamePathRetryIdempotency() throws Exception { Configuration configuration = new Configuration(getRawConfiguration()); configuration.set(FS_AZURE_ENABLE_CLIENT_TRANSACTION_ID, "true"); try (AzureBlobFileSystem fs = getFileSystem()) { - assumeRecoveryThroughClientTransactionID(fs, false); + assumeRecoveryThroughClientTransactionID(false); AbfsClient abfsClient = Mockito.spy(fs.getAbfsClient()); fs.getAbfsStore().setClient(abfsClient); Path sourceDir = path("/testSrc"); @@ -1739,7 +1740,7 @@ public void answer(final AbfsRestOperation mockedObj, @Test public void getClientTransactionIdAfterRename() throws Exception { try (AzureBlobFileSystem fs = getFileSystem()) { - assumeRecoveryThroughClientTransactionID(fs, false); + assumeRecoveryThroughClientTransactionID(false); AbfsDfsClient abfsDfsClient = (AbfsDfsClient) Mockito.spy(fs.getAbfsClient()); fs.getAbfsStore().setClient(abfsDfsClient); final String[] clientTransactionId = new String[1]; diff --git a/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azurebfs/services/AbfsClientTestUtil.java b/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azurebfs/services/AbfsClientTestUtil.java index bc6c69ccc8fac..a92fd6f6ca173 100644 --- a/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azurebfs/services/AbfsClientTestUtil.java +++ b/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azurebfs/services/AbfsClientTestUtil.java @@ -28,6 +28,7 @@ import java.util.HashSet; import java.util.List; import java.util.Set; +import java.util.UUID; import java.util.concurrent.locks.ReentrantLock; import org.apache.hadoop.fs.azurebfs.AbfsConfiguration; @@ -51,6 +52,7 @@ import static org.apache.hadoop.fs.azurebfs.constants.HttpHeaderConfigurations.CONTENT_TYPE; import static org.apache.hadoop.fs.azurebfs.constants.HttpHeaderConfigurations.IF_MATCH; import static org.apache.hadoop.fs.azurebfs.constants.HttpHeaderConfigurations.X_MS_BLOB_CONTENT_MD5; +import static org.apache.hadoop.fs.azurebfs.constants.HttpHeaderConfigurations.X_MS_CLIENT_TRANSACTION_ID; import static org.apache.hadoop.fs.azurebfs.constants.HttpQueryParams.QUERY_PARAM_CLOSE; import static org.apache.hadoop.fs.azurebfs.constants.HttpQueryParams.QUERY_PARAM_COMP; import static org.apache.hadoop.fs.azurebfs.services.AbfsRestOperationType.PutBlockList; @@ -337,4 +339,24 @@ public static void mockGetRenameBlobHandler(AbfsBlobClient blobClient, Mockito.nullable(String.class), Mockito.anyBoolean(), Mockito.any(TracingContext.class)); } + + /** + * Mocks the behavior of adding a client transaction ID to the request headers + * for the given AzureBlobFileSystem. This method generates a random transaction ID + * and adds it to the headers of the {@link AbfsDfsClient}. + * + * @param abfsDfsClient The {@link AbfsDfsClient} mocked AbfsDfsClient. + * @param clientTransactionId An array to hold the generated transaction ID. + */ + public static void mockAddClientTransactionIdToHeader(AbfsDfsClient abfsDfsClient, + String[] clientTransactionId) { + Mockito.doAnswer(addClientTransactionId -> { + clientTransactionId[0] = UUID.randomUUID().toString(); + List headers = addClientTransactionId.getArgument(0); + headers.add( + new AbfsHttpHeader(X_MS_CLIENT_TRANSACTION_ID, + clientTransactionId[0])); + return clientTransactionId[0]; + }).when(abfsDfsClient).addClientTransactionIdToHeader(Mockito.anyList()); + } } From 39f387e1e4b8b67f473b034dcbd0317ce3bd9585 Mon Sep 17 00:00:00 2001 From: bhattmanish98 Date: Mon, 24 Feb 2025 05:20:18 -0800 Subject: [PATCH 10/15] Dummy response incase of recovery --- .../exceptions/AbfsDriverException.java | 8 ++ .../fs/azurebfs/services/AbfsDfsClient.java | 72 +++++++------- .../ITestAzureBlobFileSystemCreate.java | 98 ++++++++++++++----- .../ITestAzureBlobFileSystemRename.java | 92 ++++++++++++----- .../fs/azurebfs/services/TestAbfsClient.java | 37 ++++--- 5 files changed, 206 insertions(+), 101 deletions(-) diff --git a/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/contracts/exceptions/AbfsDriverException.java b/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/contracts/exceptions/AbfsDriverException.java index a7635e893c5f9..7b2d03d692354 100644 --- a/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/contracts/exceptions/AbfsDriverException.java +++ b/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/contracts/exceptions/AbfsDriverException.java @@ -51,4 +51,12 @@ public AbfsDriverException(final Exception innerException, final String activity : ERROR_MESSAGE + ", rId: " + activityId, null); } + + public AbfsDriverException(final String errorMessage, final Exception innerException) { + super( + AzureServiceErrorCode.UNKNOWN.getStatusCode(), + AzureServiceErrorCode.UNKNOWN.getErrorCode(), + errorMessage, + innerException); + } } diff --git a/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/services/AbfsDfsClient.java b/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/services/AbfsDfsClient.java index a27ead9419012..40a221b88c342 100644 --- a/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/services/AbfsDfsClient.java +++ b/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/services/AbfsDfsClient.java @@ -50,6 +50,7 @@ import org.apache.hadoop.fs.azurebfs.constants.AbfsHttpConstants.ApiVersion; import org.apache.hadoop.fs.azurebfs.constants.HttpHeaderConfigurations; import org.apache.hadoop.fs.azurebfs.constants.HttpQueryParams; +import org.apache.hadoop.fs.azurebfs.contracts.exceptions.AbfsDriverException; import org.apache.hadoop.fs.azurebfs.contracts.exceptions.AbfsInvalidChecksumException; import org.apache.hadoop.fs.azurebfs.contracts.exceptions.AbfsRestOperationException; import org.apache.hadoop.fs.azurebfs.contracts.exceptions.AzureBlobFileSystemException; @@ -415,7 +416,9 @@ public AbfsRestOperation createPath(final String path, String existingResource = op.getResult().getResponseHeader(X_MS_EXISTING_RESOURCE_TYPE); if (existingResource != null && existingResource.equals(DIRECTORY)) { - return op; //don't throw ex on mkdirs for existing directory + //don't throw ex on mkdirs for existing directory + return getSuccessOp(AbfsRestOperationType.CreatePath, + HTTP_METHOD_PUT, url, requestHeaders); } } } else { @@ -430,11 +433,13 @@ public AbfsRestOperation createPath(final String path, if (clientTransactionId.equals( getPathStatusOp.getResponseHeader( X_MS_CLIENT_TRANSACTION_ID))) { - return op; + return getSuccessOp(AbfsRestOperationType.CreatePath, + HTTP_METHOD_PUT, url, requestHeaders); } - } catch (AzureBlobFileSystemException ignored) { - // In case of get path status failure, - // we will throw the original exception. + } catch (AzureBlobFileSystemException exception) { + throw new AbfsDriverException( + "Error in getPathStatus while recovering from create failure.", + exception); } } } @@ -665,35 +670,7 @@ public AbfsClientRenameResult renamePath( String sourceEtag, boolean isMetadataIncompleteState) throws IOException { final List requestHeaders = createDefaultHeaders(); - - final boolean hasEtag = !isEmpty(sourceEtag); - boolean shouldAttemptRecovery = isRenameResilience() && getIsNamespaceEnabled(); - if (!hasEtag && shouldAttemptRecovery) { - // in case eTag is already not supplied to the API - // and rename resilience is expected and it is an HNS enabled account - // fetch the source etag to be used later in recovery - try { - final AbfsRestOperation srcStatusOp = getPathStatus(source, - false, tracingContext, null); - if (srcStatusOp.hasResult()) { - final AbfsHttpOperation result = srcStatusOp.getResult(); - sourceEtag = extractEtagHeader(result); - // and update the directory status. - boolean isDir = checkIsDir(result); - shouldAttemptRecovery = !isDir; - LOG.debug( - "Retrieved etag of source for rename recovery: {}; isDir={}", - sourceEtag, isDir); - } - } catch (AbfsRestOperationException e) { - throw new AbfsRestOperationException(e.getStatusCode(), - SOURCE_PATH_NOT_FOUND.getErrorCode(), - e.getMessage(), e); - } - - } - String encodedRenameSource = urlEncode( FORWARD_SLASH + this.getFileSystem() + source); if (getAuthType() == AuthType.SAS) { @@ -745,12 +722,15 @@ public AbfsClientRenameResult renamePath( if (clientTransactionId.equals( abfsHttpOperation.getResponseHeader( X_MS_CLIENT_TRANSACTION_ID))) { - return new AbfsClientRenameResult(op, true, + return new AbfsClientRenameResult( + getSuccessOp(AbfsRestOperationType.RenamePath, + HTTP_METHOD_PUT, url, requestHeaders), true, isMetadataIncompleteState); } - } catch (AbfsRestOperationException ignored) { - // In case of get path status failure, - // we will throw the original exception. + } catch (AzureBlobFileSystemException exception) { + throw new AbfsDriverException( + "Error in getPathStatus while recovering from rename failure.", + exception); } throw e; } @@ -1658,4 +1638,22 @@ public String addClientTransactionIdToHeader(List requestHeaders } return clientTransactionId; } + + /** + * Get the dummy success operation. + * @param operationType type of the operation + * @param httpMethod http method + * @param url url to be used + * @param requestHeaders list of headers to be sent with the request + * @return success operation + * @throws AzureBlobFileSystemException if rest operation fails. + */ + private AbfsRestOperation getSuccessOp(final AbfsRestOperationType operationType, + final String httpMethod, final URL url, + final List requestHeaders) throws AzureBlobFileSystemException { + final AbfsRestOperation successOp = getAbfsRestOperation( + operationType, httpMethod, url, requestHeaders); + successOp.hardSetResult(HttpURLConnection.HTTP_OK); + return successOp; + } } diff --git a/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azurebfs/ITestAzureBlobFileSystemCreate.java b/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azurebfs/ITestAzureBlobFileSystemCreate.java index b2dc7b21bc7d8..9f40673dd71b9 100644 --- a/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azurebfs/ITestAzureBlobFileSystemCreate.java +++ b/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azurebfs/ITestAzureBlobFileSystemCreate.java @@ -50,6 +50,7 @@ import org.apache.hadoop.fs.azurebfs.constants.AbfsHttpConstants; import org.apache.hadoop.fs.azurebfs.constants.AbfsServiceType; import org.apache.hadoop.fs.azurebfs.constants.FSOperationType; +import org.apache.hadoop.fs.azurebfs.contracts.exceptions.AbfsDriverException; import org.apache.hadoop.fs.azurebfs.contracts.exceptions.AbfsRestOperationException; import org.apache.hadoop.fs.azurebfs.contracts.exceptions.ConcurrentWriteOperationDetectedException; import org.apache.hadoop.fs.azurebfs.contracts.services.AzureServiceErrorCode; @@ -74,6 +75,7 @@ import org.apache.hadoop.test.LambdaTestUtils; import org.apache.hadoop.test.ReflectionUtils; +import static java.net.HttpURLConnection.HTTP_CLIENT_TIMEOUT; import static java.net.HttpURLConnection.HTTP_CONFLICT; import static java.net.HttpURLConnection.HTTP_INTERNAL_ERROR; import static java.net.HttpURLConnection.HTTP_NOT_FOUND; @@ -2091,29 +2093,7 @@ public void createPathRetryIdempotency() throws Exception { final Path nonOverwriteFile = new Path( "/NonOverwriteTest_FileName_" + UUID.randomUUID()); final List headers = new ArrayList<>(); - TestAbfsClient.mockAbfsOperationCreation(abfsClient, - new MockIntercept() { - private int count = 0; - - @Override - public void answer(final AbfsRestOperation mockedObj, - final InvocationOnMock answer) - throws AbfsRestOperationException { - if (count == 0) { - count = 1; - AbfsHttpOperation op = Mockito.mock(AbfsHttpOperation.class); - Mockito.doReturn(HTTP_METHOD_PUT).when(op).getMethod(); - Mockito.doReturn(EMPTY_STRING).when(op).getStorageErrorMessage(); - Mockito.doReturn(true).when(mockedObj).hasResult(); - Mockito.doReturn(op).when(mockedObj).getResult(); - Mockito.doReturn(HTTP_CONFLICT).when(op).getStatusCode(); - headers.addAll(mockedObj.getRequestHeaders()); - throw new AbfsRestOperationException(HTTP_CONFLICT, - AzureServiceErrorCode.PATH_CONFLICT.getErrorCode(), EMPTY_STRING, - null, op); - } - } - }); + mockRetriedRequest(abfsClient, headers); AbfsRestOperation getPathRestOp = Mockito.mock(AbfsRestOperation.class); AbfsHttpOperation op = Mockito.mock(AbfsHttpOperation.class); Mockito.doAnswer(answer -> { @@ -2197,7 +2177,7 @@ public void testClientTransactionIdAfterTwoCreateCalls() throws Exception { AzureBlobFileSystemStore.Permissions permissions = new AzureBlobFileSystemStore.Permissions(false, FsPermission.getDefault(), FsPermission.getUMask(fs.getConf())); - fs.create(testPath, false); //5ff449d1-b5d2-478c-9722-8e26ebb5501e + fs.create(testPath, false); fs.create(testPath, true); final AbfsHttpOperation getPathStatusOp = abfsDfsClient.getPathStatus(testPath.toUri().getPath(), false, @@ -2213,6 +2193,49 @@ public void testClientTransactionIdAfterTwoCreateCalls() throws Exception { } } + /** + * Test to verify that the client transaction ID is included in the response header + * during the creation of a new file in Azure Blob Storage. + *

+ * This test ensures that when a new file is created, the Azure Blob FileSystem client + * correctly includes the client transaction ID in the response header for the created file. + * The test uses a configuration where client transaction ID is enabled and verifies + * its presence after the file creation operation. + *

+ * + * @throws Exception if any error occurs during test execution + */ + @Test + public void failureInGetPathStatusDuringCreateRecovery() throws Exception { + try (AzureBlobFileSystem fs = getFileSystem()) { + assumeRecoveryThroughClientTransactionID(true); + final String[] clientTransactionId = new String[1]; + AbfsDfsClient abfsDfsClient = mockIngressClientHandler(fs); + mockAddClientTransactionIdToHeader(abfsDfsClient, clientTransactionId); + mockRetriedRequest(abfsDfsClient, new ArrayList<>()); + boolean[] flag = new boolean[1]; + Mockito.doAnswer(getPathStatus -> { + if (!flag[0]) { + flag[0] = true; + throw new AbfsRestOperationException(HTTP_CLIENT_TIMEOUT, "", "", new Exception()); + } + return getPathStatus.callRealMethod(); + }).when(abfsDfsClient).getPathStatus( + Mockito.nullable(String.class), Mockito.nullable(Boolean.class), + Mockito.nullable(TracingContext.class), + Mockito.nullable(ContextEncryptionAdapter.class)); + + final Path nonOverwriteFile = new Path( + "/NonOverwriteTest_FileName_" + UUID.randomUUID()); + String errorMessage = intercept(AbfsDriverException.class, + () -> fs.create(nonOverwriteFile, false)).getErrorMessage(); + + Assertions.assertThat(errorMessage) + .describedAs("getPathStatus should fail while recovering") + .contains("Error in getPathStatus while recovering from create failure."); + } + } + /** * Mocks and returns an instance of {@link AbfsDfsClient} for the given AzureBlobFileSystem. * This method sets up the necessary mock behavior for the client handler and ingress client. @@ -2230,4 +2253,31 @@ private AbfsDfsClient mockIngressClientHandler(AzureBlobFileSystem fs) { Mockito.doReturn(abfsDfsClient).when(clientHandler).getIngressClient(); return abfsDfsClient; } + + private void mockRetriedRequest(AbfsDfsClient abfsDfsClient, + final List headers) throws Exception { + TestAbfsClient.mockAbfsOperationCreation(abfsDfsClient, + new MockIntercept() { + private int count = 0; + + @Override + public void answer(final AbfsRestOperation mockedObj, + final InvocationOnMock answer) + throws AbfsRestOperationException { + if (count == 0) { + count = 1; + AbfsHttpOperation op = Mockito.mock(AbfsHttpOperation.class); + Mockito.doReturn(HTTP_METHOD_PUT).when(op).getMethod(); + Mockito.doReturn(EMPTY_STRING).when(op).getStorageErrorMessage(); + Mockito.doReturn(true).when(mockedObj).hasResult(); + Mockito.doReturn(op).when(mockedObj).getResult(); + Mockito.doReturn(HTTP_CONFLICT).when(op).getStatusCode(); + headers.addAll(mockedObj.getRequestHeaders()); + throw new AbfsRestOperationException(HTTP_CONFLICT, + AzureServiceErrorCode.PATH_CONFLICT.getErrorCode(), EMPTY_STRING, + null, op); + } + } + }); + } } diff --git a/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azurebfs/ITestAzureBlobFileSystemRename.java b/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azurebfs/ITestAzureBlobFileSystemRename.java index 641b50e716f6f..34f70e3257b05 100644 --- a/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azurebfs/ITestAzureBlobFileSystemRename.java +++ b/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azurebfs/ITestAzureBlobFileSystemRename.java @@ -44,6 +44,7 @@ import org.apache.hadoop.fs.Path; import org.apache.hadoop.fs.azurebfs.constants.FSOperationType; import org.apache.hadoop.fs.azurebfs.constants.HttpHeaderConfigurations; +import org.apache.hadoop.fs.azurebfs.contracts.exceptions.AbfsDriverException; import org.apache.hadoop.fs.azurebfs.contracts.exceptions.AbfsRestOperationException; import org.apache.hadoop.fs.azurebfs.contracts.exceptions.AzureBlobFileSystemException; import org.apache.hadoop.fs.azurebfs.security.ContextEncryptionAdapter; @@ -66,6 +67,7 @@ import org.apache.hadoop.test.LambdaTestUtils; import org.apache.hadoop.util.functional.FunctionRaisingIOE; +import static java.net.HttpURLConnection.HTTP_CLIENT_TIMEOUT; import static java.net.HttpURLConnection.HTTP_CONFLICT; import static java.net.HttpURLConnection.HTTP_FORBIDDEN; import static java.net.HttpURLConnection.HTTP_NOT_FOUND; @@ -1675,30 +1677,7 @@ public void renamePathRetryIdempotency() throws Exception { touch(sourceFilePath); Path destFilePath = new Path(sourceDir, "file2"); final List headers = new ArrayList<>(); - TestAbfsClient.mockAbfsOperationCreation(abfsClient, - new MockIntercept() { - private int count = 0; - - @Override - public void answer(final AbfsRestOperation mockedObj, - final InvocationOnMock answer) - throws AbfsRestOperationException { - if (count == 0) { - count = 1; - AbfsHttpOperation op = Mockito.mock(AbfsHttpOperation.class); - Mockito.doReturn(HTTP_METHOD_PUT).when(op).getMethod(); - Mockito.doReturn(EMPTY_STRING).when(op).getStorageErrorMessage(); - Mockito.doReturn(SOURCE_PATH_NOT_FOUND.getErrorCode()).when(op) - .getStorageErrorCode(); - Mockito.doReturn(true).when(mockedObj).hasResult(); - Mockito.doReturn(op).when(mockedObj).getResult(); - Mockito.doReturn(HTTP_NOT_FOUND).when(op).getStatusCode(); - headers.addAll(mockedObj.getRequestHeaders()); - throw new AbfsRestOperationException(HTTP_NOT_FOUND, - SOURCE_PATH_NOT_FOUND.getErrorCode(), EMPTY_STRING, null, op); - } - } - }); + AbfsRestOperation getPathRestOp = Mockito.mock(AbfsRestOperation.class); AbfsHttpOperation op = Mockito.mock(AbfsHttpOperation.class); Mockito.doAnswer(answer -> { @@ -1766,4 +1745,69 @@ public void getClientTransactionIdAfterRename() throws Exception { .isEqualTo(clientTransactionId[0]); } } + + @Test + public void failureInGetPathStatusDuringRenameRecovery() throws Exception { + try (AzureBlobFileSystem fs = getFileSystem()) { + assumeRecoveryThroughClientTransactionID(false); + AbfsDfsClient abfsDfsClient = (AbfsDfsClient) Mockito.spy(fs.getAbfsClient()); + fs.getAbfsStore().setClient(abfsDfsClient); + final String[] clientTransactionId = new String[1]; + mockAddClientTransactionIdToHeader(abfsDfsClient, clientTransactionId); + mockRetriedRequest(abfsDfsClient, new ArrayList<>()); + boolean[] flag = new boolean[1]; + Mockito.doAnswer(getPathStatus -> { + if (!flag[0]) { + flag[0] = true; + throw new AbfsRestOperationException(HTTP_CLIENT_TIMEOUT, "", "", new Exception()); + } + return getPathStatus.callRealMethod(); + }).when(abfsDfsClient).getPathStatus( + Mockito.nullable(String.class), Mockito.nullable(Boolean.class), + Mockito.nullable(TracingContext.class), + Mockito.nullable(ContextEncryptionAdapter.class)); + + Path sourceDir = path("/testSrc"); + assertMkdirs(fs, sourceDir); + String filename = "file1"; + Path sourceFilePath = new Path(sourceDir, filename); + touch(sourceFilePath); + Path destFilePath = new Path(sourceDir, "file2"); + + String errorMessage = intercept(AbfsDriverException.class, + () -> fs.rename(sourceFilePath, destFilePath)).getErrorMessage(); + + Assertions.assertThat(errorMessage) + .describedAs("getPathStatus should fail while recovering") + .contains("Error in getPathStatus while recovering from rename failure."); + } + } + + private void mockRetriedRequest(AbfsDfsClient abfsDfsClient, + final List headers) throws Exception { + TestAbfsClient.mockAbfsOperationCreation(abfsDfsClient, + new MockIntercept() { + private int count = 0; + + @Override + public void answer(final AbfsRestOperation mockedObj, + final InvocationOnMock answer) + throws AbfsRestOperationException { + if (count == 0) { + count = 1; + AbfsHttpOperation op = Mockito.mock(AbfsHttpOperation.class); + Mockito.doReturn(HTTP_METHOD_PUT).when(op).getMethod(); + Mockito.doReturn(EMPTY_STRING).when(op).getStorageErrorMessage(); + Mockito.doReturn(SOURCE_PATH_NOT_FOUND.getErrorCode()).when(op) + .getStorageErrorCode(); + Mockito.doReturn(true).when(mockedObj).hasResult(); + Mockito.doReturn(op).when(mockedObj).getResult(); + Mockito.doReturn(HTTP_NOT_FOUND).when(op).getStatusCode(); + headers.addAll(mockedObj.getRequestHeaders()); + throw new AbfsRestOperationException(HTTP_NOT_FOUND, + SOURCE_PATH_NOT_FOUND.getErrorCode(), EMPTY_STRING, null, op); + } + } + }); + } } diff --git a/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azurebfs/services/TestAbfsClient.java b/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azurebfs/services/TestAbfsClient.java index 8c918d8f1c697..2495e50f43475 100644 --- a/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azurebfs/services/TestAbfsClient.java +++ b/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azurebfs/services/TestAbfsClient.java @@ -152,23 +152,28 @@ private boolean isThreadRunning(String threadName) { */ public static void mockAbfsOperationCreation(final AbfsClient abfsClient, final MockIntercept mockIntercept) throws Exception { + boolean[] flag = new boolean[1]; Mockito.doAnswer(answer -> { - AbfsRestOperation op = Mockito.spy( - new AbfsRestOperation( - answer.getArgument(0), - abfsClient, - answer.getArgument(1), - answer.getArgument(2), - answer.getArgument(3), - abfsClient.getAbfsConfiguration() - )); - Mockito.doAnswer((answer1) -> { - mockIntercept.answer(op, answer1); - return null; - }).when(op) - .execute(any()); - Mockito.doReturn(true).when(op).isARetriedRequest(); - return op; + if (!flag[0]) { + flag[0] = true; + AbfsRestOperation op = Mockito.spy( + new AbfsRestOperation( + answer.getArgument(0), + abfsClient, + answer.getArgument(1), + answer.getArgument(2), + answer.getArgument(3), + abfsClient.getAbfsConfiguration() + )); + Mockito.doAnswer((answer1) -> { + mockIntercept.answer(op, answer1); + return null; + }).when(op) + .execute(any()); + Mockito.doReturn(true).when(op).isARetriedRequest(); + return op; + } + return answer.callRealMethod(); }).when(abfsClient) .getAbfsRestOperation(any(), any(), any(), any()); } From ba0b3a1a423d8635ac4f6738317750e3f109171e Mon Sep 17 00:00:00 2001 From: bhattmanish98 Date: Mon, 24 Feb 2025 05:36:07 -0800 Subject: [PATCH 11/15] Java doc for test cases added --- .../ITestAzureBlobFileSystemCreate.java | 35 ++++++++++++------- .../ITestAzureBlobFileSystemRename.java | 19 ++++++++-- 2 files changed, 38 insertions(+), 16 deletions(-) diff --git a/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azurebfs/ITestAzureBlobFileSystemCreate.java b/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azurebfs/ITestAzureBlobFileSystemCreate.java index 9f40673dd71b9..d546745404bf3 100644 --- a/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azurebfs/ITestAzureBlobFileSystemCreate.java +++ b/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azurebfs/ITestAzureBlobFileSystemCreate.java @@ -2120,12 +2120,11 @@ public void createPathRetryIdempotency() throws Exception { /** * Test to verify that the client transaction ID is included in the response header * during the creation of a new file in Azure Blob Storage. - *

+ * * This test ensures that when a new file is created, the Azure Blob FileSystem client * correctly includes the client transaction ID in the response header for the created file. * The test uses a configuration where client transaction ID is enabled and verifies * its presence after the file creation operation. - *

* * @throws Exception if any error occurs during test execution */ @@ -2157,12 +2156,11 @@ public void getClientTransactionIdAfterCreate() throws Exception { /** * Test to verify that the client transaction ID is included in the response header * after two consecutive create operations on the same file in Azure Blob Storage. - *

+ * * This test ensures that even after performing two create operations (with overwrite) * on the same file, the Azure Blob FileSystem client includes the client transaction ID * in the response header for the created file. The test checks for the presence of * the client transaction ID in the response after the second create call. - *

* * @throws Exception if any error occurs during test execution */ @@ -2194,16 +2192,16 @@ public void testClientTransactionIdAfterTwoCreateCalls() throws Exception { } /** - * Test to verify that the client transaction ID is included in the response header - * during the creation of a new file in Azure Blob Storage. - *

- * This test ensures that when a new file is created, the Azure Blob FileSystem client - * correctly includes the client transaction ID in the response header for the created file. - * The test uses a configuration where client transaction ID is enabled and verifies - * its presence after the file creation operation. - *

+ * Test case to simulate a failure scenario during the recovery process while + * creating a file in Azure Blob File System. This test verifies that when the + * `getPathStatus` method encounters a timeout exception during recovery, it + * triggers an appropriate failure response. * - * @throws Exception if any error occurs during test execution + * The test mocks the `AbfsDfsClient` to simulate the failure behavior, including + * a retry logic. It also verifies that an exception is correctly thrown and the + * error message contains the expected details for recovery failure. + * + * @throws Exception If an error occurs during the test setup or execution. */ @Test public void failureInGetPathStatusDuringCreateRecovery() throws Exception { @@ -2254,6 +2252,17 @@ private AbfsDfsClient mockIngressClientHandler(AzureBlobFileSystem fs) { return abfsDfsClient; } + /** + * Mocks the retry behavior for an AbfsDfsClient request. The method intercepts + * the Abfs operation and simulates an HTTP conflict (HTTP 409) error on the + * first invocation. It creates a mock HTTP operation with a PUT method and + * specific status codes and error messages. + * + * @param abfsDfsClient The AbfsDfsClient to mock operations for. + * @param headers The list of HTTP headers to which request headers will be added. + * + * @throws Exception If an error occurs during mock creation or operation execution. + */ private void mockRetriedRequest(AbfsDfsClient abfsDfsClient, final List headers) throws Exception { TestAbfsClient.mockAbfsOperationCreation(abfsDfsClient, diff --git a/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azurebfs/ITestAzureBlobFileSystemRename.java b/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azurebfs/ITestAzureBlobFileSystemRename.java index 34f70e3257b05..9b17ba2ee70bb 100644 --- a/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azurebfs/ITestAzureBlobFileSystemRename.java +++ b/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azurebfs/ITestAzureBlobFileSystemRename.java @@ -1668,7 +1668,7 @@ public void renamePathRetryIdempotency() throws Exception { configuration.set(FS_AZURE_ENABLE_CLIENT_TRANSACTION_ID, "true"); try (AzureBlobFileSystem fs = getFileSystem()) { assumeRecoveryThroughClientTransactionID(false); - AbfsClient abfsClient = Mockito.spy(fs.getAbfsClient()); + AbfsDfsClient abfsClient = (AbfsDfsClient) Mockito.spy(fs.getAbfsClient()); fs.getAbfsStore().setClient(abfsClient); Path sourceDir = path("/testSrc"); assertMkdirs(fs, sourceDir); @@ -1676,7 +1676,9 @@ public void renamePathRetryIdempotency() throws Exception { Path sourceFilePath = new Path(sourceDir, filename); touch(sourceFilePath); Path destFilePath = new Path(sourceDir, "file2"); + final List headers = new ArrayList<>(); + mockRetriedRequest(abfsClient, headers); AbfsRestOperation getPathRestOp = Mockito.mock(AbfsRestOperation.class); AbfsHttpOperation op = Mockito.mock(AbfsHttpOperation.class); @@ -1707,12 +1709,11 @@ public void renamePathRetryIdempotency() throws Exception { /** * Test to verify that the client transaction ID is included in the response header * after renaming a file in Azure Blob Storage. - *

+ * * This test ensures that when a file is renamed, the Azure Blob FileSystem client * properly includes the client transaction ID in the response header for the renamed file. * The test uses a configuration where client transaction ID is enabled and verifies * its presence after performing a rename operation. - *

* * @throws Exception if any error occurs during test execution */ @@ -1746,6 +1747,7 @@ public void getClientTransactionIdAfterRename() throws Exception { } } + @Test public void failureInGetPathStatusDuringRenameRecovery() throws Exception { try (AzureBlobFileSystem fs = getFileSystem()) { @@ -1783,6 +1785,17 @@ public void failureInGetPathStatusDuringRenameRecovery() throws Exception { } } + /** + * Mocks the retry behavior for an AbfsDfsClient request. The method intercepts + * the Abfs operation and simulates an HTTP conflict (HTTP 404) error on the + * first invocation. It creates a mock HTTP operation with a PUT method and + * specific status codes and error messages. + * + * @param abfsDfsClient The AbfsDfsClient to mock operations for. + * @param headers The list of HTTP headers to which request headers will be added. + * + * @throws Exception If an error occurs during mock creation or operation execution. + */ private void mockRetriedRequest(AbfsDfsClient abfsDfsClient, final List headers) throws Exception { TestAbfsClient.mockAbfsOperationCreation(abfsDfsClient, From c03a11329236743824093c5da5252a90ed37f47f Mon Sep 17 00:00:00 2001 From: bhattmanish98 Date: Mon, 24 Feb 2025 08:15:30 -0800 Subject: [PATCH 12/15] Java doc added --- .../fs/azurebfs/ITestAzureBlobFileSystemRename.java | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azurebfs/ITestAzureBlobFileSystemRename.java b/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azurebfs/ITestAzureBlobFileSystemRename.java index 9b17ba2ee70bb..a0d5030ff774d 100644 --- a/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azurebfs/ITestAzureBlobFileSystemRename.java +++ b/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azurebfs/ITestAzureBlobFileSystemRename.java @@ -1747,7 +1747,13 @@ public void getClientTransactionIdAfterRename() throws Exception { } } - + /** + * Tests the recovery process during a file rename operation in Azure Blob File System when + * the `getPathStatus` method encounters a timeout exception. The test ensures that the proper + * error message is returned when the operation fails during recovery. + * + * @throws Exception If an error occurs during the test setup or execution. + */ @Test public void failureInGetPathStatusDuringRenameRecovery() throws Exception { try (AzureBlobFileSystem fs = getFileSystem()) { From b0ad61c8bffa38fbb8be5ea080aafe023c417d6a Mon Sep 17 00:00:00 2001 From: bhattmanish98 Date: Tue, 25 Feb 2025 04:37:45 -0800 Subject: [PATCH 13/15] Changes as per comments --- .../fs/azurebfs/services/AbfsClient.java | 18 ++++++ .../fs/azurebfs/services/AbfsDfsClient.java | 62 +++++++++++-------- .../fs/azurebfs/services/AbfsErrors.java | 4 ++ .../ITestAzureBlobFileSystemCreate.java | 9 +-- .../ITestAzureBlobFileSystemRename.java | 9 +-- .../services/TestAbfsRenameRetryRecovery.java | 44 +++++++++---- 6 files changed, 100 insertions(+), 46 deletions(-) diff --git a/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/services/AbfsClient.java b/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/services/AbfsClient.java index bf6d097dc5dcc..3e2ef4880613b 100644 --- a/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/services/AbfsClient.java +++ b/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/services/AbfsClient.java @@ -1738,4 +1738,22 @@ public abstract Hashtable getXMSProperties(AbfsHttpOperation res * @throws UnsupportedEncodingException if decoding fails */ public abstract String decodeAttribute(byte[] value) throws UnsupportedEncodingException; + + /** + * Get the dummy success operation. + * @param operationType type of the operation + * @param httpMethod http method + * @param url url to be used + * @param requestHeaders list of headers to be sent with the request + * @return success operation + * @throws AzureBlobFileSystemException if rest operation fails. + */ + protected AbfsRestOperation getSuccessOp(final AbfsRestOperationType operationType, + final String httpMethod, final URL url, + final List requestHeaders) throws AzureBlobFileSystemException { + final AbfsRestOperation successOp = getAbfsRestOperation( + operationType, httpMethod, url, requestHeaders); + successOp.hardSetResult(HttpURLConnection.HTTP_OK); + return successOp; + } } diff --git a/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/services/AbfsDfsClient.java b/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/services/AbfsDfsClient.java index 40a221b88c342..05acaa78f489f 100644 --- a/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/services/AbfsDfsClient.java +++ b/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/services/AbfsDfsClient.java @@ -137,7 +137,9 @@ import static org.apache.hadoop.fs.azurebfs.contracts.services.AzureServiceErrorCode.RENAME_DESTINATION_PARENT_PATH_NOT_FOUND; import static org.apache.hadoop.fs.azurebfs.contracts.services.AzureServiceErrorCode.SOURCE_PATH_NOT_FOUND; import static org.apache.hadoop.fs.azurebfs.contracts.services.AzureServiceErrorCode.UNAUTHORIZED_BLOB_OVERWRITE; +import static org.apache.hadoop.fs.azurebfs.services.AbfsErrors.ERR_CREATE_RECOVERY; import static org.apache.hadoop.fs.azurebfs.services.AbfsErrors.ERR_FILE_ALREADY_EXISTS; +import static org.apache.hadoop.fs.azurebfs.services.AbfsErrors.ERR_RENAME_RECOVERY; /** * AbfsClient interacting with the DFS Endpoint. @@ -437,9 +439,7 @@ public AbfsRestOperation createPath(final String path, HTTP_METHOD_PUT, url, requestHeaders); } } catch (AzureBlobFileSystemException exception) { - throw new AbfsDriverException( - "Error in getPathStatus while recovering from create failure.", - exception); + throw new AbfsDriverException(ERR_CREATE_RECOVERY, exception); } } } @@ -670,7 +670,35 @@ public AbfsClientRenameResult renamePath( String sourceEtag, boolean isMetadataIncompleteState) throws IOException { final List requestHeaders = createDefaultHeaders(); + + final boolean hasEtag = !isEmpty(sourceEtag); + boolean shouldAttemptRecovery = isRenameResilience() && getIsNamespaceEnabled(); + if (!hasEtag && shouldAttemptRecovery) { + // in case eTag is already not supplied to the API + // and rename resilience is expected and it is an HNS enabled account + // fetch the source etag to be used later in recovery + try { + final AbfsRestOperation srcStatusOp = getPathStatus(source, + false, tracingContext, null); + if (srcStatusOp.hasResult()) { + final AbfsHttpOperation result = srcStatusOp.getResult(); + sourceEtag = extractEtagHeader(result); + // and update the directory status. + boolean isDir = checkIsDir(result); + shouldAttemptRecovery = !isDir; + LOG.debug( + "Retrieved etag of source for rename recovery: {}; isDir={}", + sourceEtag, isDir); + } + } catch (AbfsRestOperationException e) { + throw new AbfsRestOperationException(e.getStatusCode(), + SOURCE_PATH_NOT_FOUND.getErrorCode(), + e.getMessage(), e); + } + + } + String encodedRenameSource = urlEncode( FORWARD_SLASH + this.getFileSystem() + source); if (getAuthType() == AuthType.SAS) { @@ -714,7 +742,7 @@ public AbfsClientRenameResult renamePath( // recovery using client transaction id only if it is a retried request. if (op.isARetriedRequest() && clientTransactionId != null && SOURCE_PATH_NOT_FOUND.getErrorCode().equalsIgnoreCase( - op.getResult().getStorageErrorCode())) { + op.getResult().getStorageErrorCode())) { try { final AbfsHttpOperation abfsHttpOperation = getPathStatus(destination, false, @@ -724,13 +752,11 @@ public AbfsClientRenameResult renamePath( X_MS_CLIENT_TRANSACTION_ID))) { return new AbfsClientRenameResult( getSuccessOp(AbfsRestOperationType.RenamePath, - HTTP_METHOD_PUT, url, requestHeaders), true, + HTTP_METHOD_PUT, url, requestHeaders), true, isMetadataIncompleteState); } } catch (AzureBlobFileSystemException exception) { - throw new AbfsDriverException( - "Error in getPathStatus while recovering from rename failure.", - exception); + throw new AbfsDriverException(ERR_RENAME_RECOVERY, exception); } throw e; } @@ -739,7 +765,7 @@ public AbfsClientRenameResult renamePath( // rename operation's validity. If there is an existing destination path, it may be rejected // with an authorization error. Catching and throwing FileAlreadyExistsException instead. if (op.getResult().getStorageErrorCode() - .equals(UNAUTHORIZED_BLOB_OVERWRITE.getErrorCode())){ + .equals(UNAUTHORIZED_BLOB_OVERWRITE.getErrorCode())) { throw new FileAlreadyExistsException(ERR_FILE_ALREADY_EXISTS); } @@ -1638,22 +1664,4 @@ public String addClientTransactionIdToHeader(List requestHeaders } return clientTransactionId; } - - /** - * Get the dummy success operation. - * @param operationType type of the operation - * @param httpMethod http method - * @param url url to be used - * @param requestHeaders list of headers to be sent with the request - * @return success operation - * @throws AzureBlobFileSystemException if rest operation fails. - */ - private AbfsRestOperation getSuccessOp(final AbfsRestOperationType operationType, - final String httpMethod, final URL url, - final List requestHeaders) throws AzureBlobFileSystemException { - final AbfsRestOperation successOp = getAbfsRestOperation( - operationType, httpMethod, url, requestHeaders); - successOp.hardSetResult(HttpURLConnection.HTTP_OK); - return successOp; - } } diff --git a/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/services/AbfsErrors.java b/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/services/AbfsErrors.java index 87c8869d6c61c..e75df046d8d6b 100644 --- a/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/services/AbfsErrors.java +++ b/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/services/AbfsErrors.java @@ -69,5 +69,9 @@ public final class AbfsErrors { "FNS-Blob delete was not successful for path: "; public static final String ATOMIC_DIR_RENAME_RECOVERY_ON_GET_PATH_EXCEPTION = "Path had to be recovered from atomic rename operation."; + public static final String ERR_CREATE_RECOVERY = + "Error while recovering from create failure."; + public static final String ERR_RENAME_RECOVERY = + "Error while recovering from rename failure."; private AbfsErrors() {} } diff --git a/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azurebfs/ITestAzureBlobFileSystemCreate.java b/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azurebfs/ITestAzureBlobFileSystemCreate.java index d546745404bf3..f59c4a8b9f27f 100644 --- a/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azurebfs/ITestAzureBlobFileSystemCreate.java +++ b/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azurebfs/ITestAzureBlobFileSystemCreate.java @@ -91,6 +91,7 @@ import static org.apache.hadoop.fs.azurebfs.constants.FileSystemConfigurations.ONE_MB; import static org.apache.hadoop.fs.azurebfs.constants.HttpHeaderConfigurations.X_MS_CLIENT_TRANSACTION_ID; import static org.apache.hadoop.fs.azurebfs.services.AbfsClientTestUtil.mockAddClientTransactionIdToHeader; +import static org.apache.hadoop.fs.azurebfs.services.AbfsErrors.ERR_CREATE_RECOVERY; import static org.apache.hadoop.fs.azurebfs.services.RenameAtomicity.SUFFIX; import static org.apache.hadoop.fs.contract.ContractTestUtils.assertIsFile; import static org.apache.hadoop.test.LambdaTestUtils.intercept; @@ -2084,7 +2085,7 @@ private String extractFileEtag(String fileName) throws IOException { * @throws Exception if any error occurs during the operation. */ @Test - public void createPathRetryIdempotency() throws Exception { + public void testCreatePathRetryIdempotency() throws Exception { Configuration configuration = new Configuration(getRawConfiguration()); configuration.set(FS_AZURE_ENABLE_CLIENT_TRANSACTION_ID, "true"); try (AzureBlobFileSystem fs = getFileSystem(configuration)) { @@ -2129,7 +2130,7 @@ public void createPathRetryIdempotency() throws Exception { * @throws Exception if any error occurs during test execution */ @Test - public void getClientTransactionIdAfterCreate() throws Exception { + public void testGetClientTransactionIdAfterCreate() throws Exception { try (AzureBlobFileSystem fs = getFileSystem()) { assumeRecoveryThroughClientTransactionID(true); final String[] clientTransactionId = new String[1]; @@ -2204,7 +2205,7 @@ public void testClientTransactionIdAfterTwoCreateCalls() throws Exception { * @throws Exception If an error occurs during the test setup or execution. */ @Test - public void failureInGetPathStatusDuringCreateRecovery() throws Exception { + public void testFailureInGetPathStatusDuringCreateRecovery() throws Exception { try (AzureBlobFileSystem fs = getFileSystem()) { assumeRecoveryThroughClientTransactionID(true); final String[] clientTransactionId = new String[1]; @@ -2230,7 +2231,7 @@ public void failureInGetPathStatusDuringCreateRecovery() throws Exception { Assertions.assertThat(errorMessage) .describedAs("getPathStatus should fail while recovering") - .contains("Error in getPathStatus while recovering from create failure."); + .contains(ERR_CREATE_RECOVERY); } } diff --git a/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azurebfs/ITestAzureBlobFileSystemRename.java b/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azurebfs/ITestAzureBlobFileSystemRename.java index a0d5030ff774d..afb66c054fc49 100644 --- a/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azurebfs/ITestAzureBlobFileSystemRename.java +++ b/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azurebfs/ITestAzureBlobFileSystemRename.java @@ -87,6 +87,7 @@ import static org.apache.hadoop.fs.azurebfs.contracts.services.AzureServiceErrorCode.COPY_BLOB_FAILED; import static org.apache.hadoop.fs.azurebfs.contracts.services.AzureServiceErrorCode.SOURCE_PATH_NOT_FOUND; import static org.apache.hadoop.fs.azurebfs.services.AbfsClientTestUtil.mockAddClientTransactionIdToHeader; +import static org.apache.hadoop.fs.azurebfs.services.AbfsErrors.ERR_RENAME_RECOVERY; import static org.apache.hadoop.fs.azurebfs.services.RenameAtomicity.SUFFIX; import static org.apache.hadoop.fs.contract.ContractTestUtils.assertIsFile; import static org.apache.hadoop.fs.contract.ContractTestUtils.assertMkdirs; @@ -1663,7 +1664,7 @@ public void testRenameSrcDirDeleteEmitDeletionCountInClientRequestId() * @throws Exception if an error occurs during the file system operations or mocking */ @Test - public void renamePathRetryIdempotency() throws Exception { + public void testRenamePathRetryIdempotency() throws Exception { Configuration configuration = new Configuration(getRawConfiguration()); configuration.set(FS_AZURE_ENABLE_CLIENT_TRANSACTION_ID, "true"); try (AzureBlobFileSystem fs = getFileSystem()) { @@ -1718,7 +1719,7 @@ public void renamePathRetryIdempotency() throws Exception { * @throws Exception if any error occurs during test execution */ @Test - public void getClientTransactionIdAfterRename() throws Exception { + public void testGetClientTransactionIdAfterRename() throws Exception { try (AzureBlobFileSystem fs = getFileSystem()) { assumeRecoveryThroughClientTransactionID(false); AbfsDfsClient abfsDfsClient = (AbfsDfsClient) Mockito.spy(fs.getAbfsClient()); @@ -1755,7 +1756,7 @@ public void getClientTransactionIdAfterRename() throws Exception { * @throws Exception If an error occurs during the test setup or execution. */ @Test - public void failureInGetPathStatusDuringRenameRecovery() throws Exception { + public void testFailureInGetPathStatusDuringRenameRecovery() throws Exception { try (AzureBlobFileSystem fs = getFileSystem()) { assumeRecoveryThroughClientTransactionID(false); AbfsDfsClient abfsDfsClient = (AbfsDfsClient) Mockito.spy(fs.getAbfsClient()); @@ -1787,7 +1788,7 @@ public void failureInGetPathStatusDuringRenameRecovery() throws Exception { Assertions.assertThat(errorMessage) .describedAs("getPathStatus should fail while recovering") - .contains("Error in getPathStatus while recovering from rename failure."); + .contains(ERR_RENAME_RECOVERY); } } diff --git a/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azurebfs/services/TestAbfsRenameRetryRecovery.java b/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azurebfs/services/TestAbfsRenameRetryRecovery.java index cbd13673b3add..b040a4b12f927 100644 --- a/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azurebfs/services/TestAbfsRenameRetryRecovery.java +++ b/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azurebfs/services/TestAbfsRenameRetryRecovery.java @@ -78,7 +78,6 @@ public class TestAbfsRenameRetryRecovery extends AbstractAbfsIntegrationTest { public TestAbfsRenameRetryRecovery() throws Exception { isNamespaceEnabled = getConfiguration() .getBoolean(TestConfigurationKeys.FS_AZURE_TEST_NAMESPACE_ENABLED_ACCOUNT, false); - Assume.assumeFalse(getConfiguration().getIsClientTransactionIdEnabled()); } /** @@ -346,8 +345,17 @@ public void testRenameRecoveryFailsForDirFsLevel() throws Exception { Long renamePathAttemptsBeforeRename = lookupCounterStatistic(ioStats, RENAME_PATH_ATTEMPTS.getStatName()); // source eTag does not match -> rename should be a failure + int newConnections; boolean renameResult = fs.rename(path1, path2); - assertEquals(false, renameResult); + if (getConfiguration().getIsClientTransactionIdEnabled()) { + // Recovery based on client transaction id should be successful + assertTrue(renameResult); + // One extra getPathStatus call should have happened + newConnections = 5; + } else { + assertFalse(renameResult); + newConnections = 4; + } // validating stat counters after rename // 3 calls should have happened in total for rename @@ -356,7 +364,7 @@ public void testRenameRecoveryFailsForDirFsLevel() throws Exception { // last getPathStatus call should be skipped assertThatStatisticCounter(ioStats, CONNECTIONS_MADE.getStatName()) - .isEqualTo(4 + connMadeBeforeRename); + .isEqualTo(newConnections + connMadeBeforeRename); // the RENAME_PATH_ATTEMPTS stat should be incremented by 1 // retries happen internally within AbfsRestOperation execute() @@ -397,11 +405,16 @@ public void testDirRenameRecoveryUnsupported() throws Exception { fs.mkdirs(new Path(path1)); - // source eTag does not match -> throw exception - expectErrorCode(SOURCE_PATH_NOT_FOUND, intercept(AbfsRestOperationException.class, () -> - spyClient.renamePath(path1, path2, null, - testTracingContext, null, - false))); + if (getConfiguration().getIsClientTransactionIdEnabled()) { + // Recovery based on client transaction id should be successful + assertTrue(fs.rename(new Path(path1), new Path(path2))); + } else { + // source eTag does not match -> throw exception + expectErrorCode(SOURCE_PATH_NOT_FOUND, intercept(AbfsRestOperationException.class, () -> + spyClient.renamePath(path1, path2, null, + testTracingContext, null, + false))); + } } /** @@ -540,11 +553,20 @@ public void testResilientCommitOperationTagMismatch() throws Throwable { final Path source = new Path(path1); touch(source); - final String sourceTag = ((EtagSource) fs.getFileStatus(source)).getEtag(); final ResilientCommitByRename commit = fs.createResilientCommitSupport(source); - intercept(FileNotFoundException.class, () -> - commit.commitSingleFileByRename(source, new Path(path2), "not the right tag")); + // When client transaction ID is enabled, the commit should succeed. + if (getConfiguration().getIsClientTransactionIdEnabled()) { + Pair response = commit.commitSingleFileByRename(source, new Path(path2), + "not the right tag"); + Assertions.assertThat(response.getKey()) + .describedAs("Recovery using client transaction ID") + .isTrue(); + } else { + intercept(FileNotFoundException.class, () -> + commit.commitSingleFileByRename(source, new Path(path2), + "not the right tag")); + } } /** From de8032aef5d08b314fa207c44a26d49050f92730 Mon Sep 17 00:00:00 2001 From: bhattmanish98 Date: Tue, 25 Feb 2025 05:09:19 -0800 Subject: [PATCH 14/15] Use getSuccessOp in necessary places --- .../fs/azurebfs/services/AbfsBlobClient.java | 15 ++++----------- .../hadoop/fs/azurebfs/services/AbfsClient.java | 9 +++------ .../fs/azurebfs/AbstractAbfsIntegrationTest.java | 9 ++++++--- 3 files changed, 13 insertions(+), 20 deletions(-) diff --git a/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/services/AbfsBlobClient.java b/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/services/AbfsBlobClient.java index b54ce1a4dac7e..df592a1495438 100644 --- a/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/services/AbfsBlobClient.java +++ b/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/services/AbfsBlobClient.java @@ -814,10 +814,9 @@ destination, sourceEtag, isAtomicRenameKey(source), tracingContext final URL url = createRequestUrl(destination, abfsUriQueryBuilder.toString()); final List requestHeaders = createDefaultHeaders(); - final AbfsRestOperation successOp = getAbfsRestOperation( + final AbfsRestOperation successOp = getSuccessOp( AbfsRestOperationType.RenamePath, HTTP_METHOD_PUT, url, requestHeaders); - successOp.hardSetResult(HTTP_OK); return new AbfsClientRenameResult(successOp, true, false); } else { throw new AbfsRestOperationException(HTTP_INTERNAL_ERROR, @@ -1208,11 +1207,8 @@ public AbfsRestOperation getPathStatus(final String path, if (op.getResult().getStatusCode() == HTTP_NOT_FOUND && isImplicitCheckRequired && isNonEmptyDirectory(path, tracingContext)) { // Implicit path found. - AbfsRestOperation successOp = getAbfsRestOperation( - AbfsRestOperationType.GetPathStatus, + return getSuccessOp(AbfsRestOperationType.GetPathStatus, HTTP_METHOD_HEAD, url, requestHeaders); - successOp.hardSetGetFileStatusResult(HTTP_OK); - return successOp; } if (op.getResult().getStatusCode() == HTTP_NOT_FOUND) { /* @@ -1308,11 +1304,8 @@ public AbfsRestOperation deletePath(final String path, = createDefaultUriQueryBuilder(); final URL url = createRequestUrl(path, abfsUriQueryBuilder.toString()); final List requestHeaders = createDefaultHeaders(); - final AbfsRestOperation successOp = getAbfsRestOperation( - AbfsRestOperationType.DeletePath, HTTP_METHOD_DELETE, - url, requestHeaders); - successOp.hardSetResult(HTTP_OK); - return successOp; + return getSuccessOp(AbfsRestOperationType.DeletePath, + HTTP_METHOD_DELETE, url, requestHeaders); } else { throw new AbfsRestOperationException(HTTP_INTERNAL_ERROR, AzureServiceErrorCode.UNKNOWN.getErrorCode(), diff --git a/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/services/AbfsClient.java b/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/services/AbfsClient.java index 3e2ef4880613b..58366941331c7 100644 --- a/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/services/AbfsClient.java +++ b/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/services/AbfsClient.java @@ -943,14 +943,12 @@ public AbfsRestOperation deleteIdempotencyCheckOp(final AbfsRestOperation op) { && DEFAULT_DELETE_CONSIDERED_IDEMPOTENT) { // Server has returned HTTP 404, which means path no longer // exists. Assuming delete result to be idempotent, return success. - final AbfsRestOperation successOp = getAbfsRestOperation( + LOG.debug("Returning success response from delete idempotency logic"); + return getSuccessOp( AbfsRestOperationType.DeletePath, HTTP_METHOD_DELETE, op.getUrl(), op.getRequestHeaders()); - successOp.hardSetResult(HttpURLConnection.HTTP_OK); - LOG.debug("Returning success response from delete idempotency logic"); - return successOp; } return op; @@ -1746,11 +1744,10 @@ public abstract Hashtable getXMSProperties(AbfsHttpOperation res * @param url url to be used * @param requestHeaders list of headers to be sent with the request * @return success operation - * @throws AzureBlobFileSystemException if rest operation fails. */ protected AbfsRestOperation getSuccessOp(final AbfsRestOperationType operationType, final String httpMethod, final URL url, - final List requestHeaders) throws AzureBlobFileSystemException { + final List requestHeaders) { final AbfsRestOperation successOp = getAbfsRestOperation( operationType, httpMethod, url, requestHeaders); successOp.hardSetResult(HttpURLConnection.HTTP_OK); diff --git a/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azurebfs/AbstractAbfsIntegrationTest.java b/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azurebfs/AbstractAbfsIntegrationTest.java index 3372cadefe1a9..e31df5eec65bb 100644 --- a/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azurebfs/AbstractAbfsIntegrationTest.java +++ b/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azurebfs/AbstractAbfsIntegrationTest.java @@ -745,16 +745,19 @@ protected void checkFuturesForExceptions(List> futures, int exceptionV protected void assumeRecoveryThroughClientTransactionID(boolean isCreate) throws IOException { // Assumes that recovery through client transaction ID is enabled. - Assume.assumeTrue(getConfiguration().getIsClientTransactionIdEnabled()); + Assume.assumeTrue("Recovery through client transaction ID is not enabled", + getConfiguration().getIsClientTransactionIdEnabled()); // Assumes that service type is DFS. assumeDfsServiceType(); // Assumes that namespace is enabled for the given AzureBlobFileSystem. assumeHnsEnabled(); if (isCreate) { // Assume that create client is DFS client. - Assume.assumeTrue(AbfsServiceType.DFS.equals(getIngressServiceType())); + Assume.assumeTrue("Ingress service type is not DFS", + AbfsServiceType.DFS.equals(getIngressServiceType())); // Assume that append blob is not enabled in DFS client. - Assume.assumeFalse(isAppendBlobEnabled()); + Assume.assumeFalse("Append blob is enabled in DFS client", + isAppendBlobEnabled()); } } } From 7c930585881eb4a86946d2f3bc5b4620b75beb0b Mon Sep 17 00:00:00 2001 From: bhattmanish98 Date: Tue, 25 Feb 2025 05:19:05 -0800 Subject: [PATCH 15/15] Implicit path changes --- .../apache/hadoop/fs/azurebfs/services/AbfsBlobClient.java | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/services/AbfsBlobClient.java b/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/services/AbfsBlobClient.java index df592a1495438..0065cee7dcb36 100644 --- a/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/services/AbfsBlobClient.java +++ b/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/services/AbfsBlobClient.java @@ -1207,8 +1207,11 @@ public AbfsRestOperation getPathStatus(final String path, if (op.getResult().getStatusCode() == HTTP_NOT_FOUND && isImplicitCheckRequired && isNonEmptyDirectory(path, tracingContext)) { // Implicit path found. - return getSuccessOp(AbfsRestOperationType.GetPathStatus, - HTTP_METHOD_HEAD, url, requestHeaders); + AbfsRestOperation successOp = getSuccessOp( + AbfsRestOperationType.GetPathStatus, HTTP_METHOD_HEAD, + url, requestHeaders); + successOp.hardSetGetFileStatusResult(HTTP_OK); + return successOp; } if (op.getResult().getStatusCode() == HTTP_NOT_FOUND) { /*