Skip to content
Merged
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -1528,10 +1528,31 @@ public AbfsRestOperation deleteBlobPath(final Path blobPath,
*/
@Override
public boolean checkIsDir(AbfsHttpOperation result) {
String resourceType = result.getResponseHeader(X_MS_META_HDI_ISFOLDER);
String dirHeaderName = getHeaderNameIgnoreCase(result, X_MS_META_HDI_ISFOLDER);
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Won't it be better to directly get the value from this function? Something like:
getCaseInsensitiveResponseHeader() similar to getResponseHeader
And this can be moved to AbfsHttpOperation class itself where normal version of this method already exists.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

It can be later used by other classes and other headers as well not only BlobClient.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Taken!

// If the header is not found, return false (not a directory)
if (dirHeaderName == null) {
return false;
}

String resourceType = result.getResponseHeader(dirHeaderName);
return resourceType != null && resourceType.equals(TRUE);
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Since getResponseHeader method returns either empty string or true- should we check for resourceType != EMPTY_STRING instead of null?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

In the case of an empty string, resourceType.equals(TRUE) will return false, so there’s no need for an explicit check for an empty string.
The null case occurs when the header we are looking for does not exist, which is why this check is needed.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yes since we already checked for null in the getHeaderNameIgnoreCase method, this repeated check may not be needed

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Since it was an existing line of code, I didn’t change it. However, it does make sense to remove this null check.

}

/**
* Get the header name case-insensitively.
* @param result executed rest operation containing response from server.
* @param header The header to be checked.
* @return Header if found, null otherwise.
*/
private String getHeaderNameIgnoreCase(AbfsHttpOperation result, String header) {
Map<String, List<String>> responseHeaders = result.getResponseHeaders();
// Search for the key case-insensitively in the headers
return responseHeaders.keySet().stream()
.filter(key -> key != null && key.equalsIgnoreCase(header))
.findFirst()
.orElse(null); // Return null if no match is found
}

/**
* Returns true if the status code lies in the range of user error.
* In the case of HTTP_CONFLICT for PutBlockList we fall back to DFS and hence
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -1601,7 +1601,8 @@ AbfsRestOperation getAbfsRestOperation(final AbfsRestOperationType operationType
* @param requestHeaders The list of HTTP headers for the request.
* @return An AbfsRestOperation instance.
*/
AbfsRestOperation getAbfsRestOperation(final AbfsRestOperationType operationType,
@VisibleForTesting
public AbfsRestOperation getAbfsRestOperation(final AbfsRestOperationType operationType,
final String httpMethod,
final URL url,
final List<AbfsHttpHeader> requestHeaders) {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,7 @@
import java.net.URL;
import java.time.Duration;
import java.util.ArrayList;
import java.util.Collections;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
Expand Down Expand Up @@ -776,15 +777,16 @@ public AbfsHttpOperationWithFixedResultForGetFileStatus(final URL url,
@Override
public String getResponseHeader(final String httpHeader) {
// Directories on FNS-Blob are identified by a special metadata header.
if (httpHeader.equals(X_MS_META_HDI_ISFOLDER)) {
if (httpHeader.equalsIgnoreCase(X_MS_META_HDI_ISFOLDER)) {
return TRUE;
}
return EMPTY_STRING;
}

@Override
public Map<String, List<String>> getResponseHeaders() {
return new HashMap<>();
return Collections.singletonMap(X_MS_META_HDI_ISFOLDER,
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Why are we adding this entry in the map for all getResponse header calls ?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

AbfsHttpOperationWithFixedResultForGetFileStatus is used in one place where we are checking for an implicit directory. In the case of an implicit directory, I am applying the same logic as in the method httpHeader.equalsIgnoreCase(X_MS_META_HDI_ISFOLDER), where we return true if the header is X_MS_META_HDI_ISFOLDER.

Collections.singletonList(TRUE));
}

@Override
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -18,8 +18,11 @@

package org.apache.hadoop.fs.azurebfs;

import java.net.URL;
import java.util.List;
import java.util.UUID;

import org.assertj.core.api.Assertions;
import org.junit.Assume;
import org.junit.Test;
import org.mockito.Mockito;
Expand All @@ -28,14 +31,25 @@
import org.apache.hadoop.fs.FileAlreadyExistsException;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.fs.azurebfs.constants.HttpHeaderConfigurations;
import org.apache.hadoop.fs.azurebfs.services.AbfsBlobClient;
import org.apache.hadoop.fs.azurebfs.services.AbfsClient;
import org.apache.hadoop.fs.azurebfs.services.AbfsClientHandler;
import org.apache.hadoop.fs.azurebfs.services.AbfsHttpHeader;
import org.apache.hadoop.fs.azurebfs.services.AbfsHttpOperation;
import org.apache.hadoop.fs.azurebfs.services.AbfsRestOperationType;

import static org.apache.hadoop.fs.azurebfs.AbfsStatistic.CONNECTIONS_MADE;
import static org.apache.hadoop.fs.azurebfs.constants.AbfsHttpConstants.HTTP_METHOD_PUT;
import static org.apache.hadoop.fs.azurebfs.constants.AbfsHttpConstants.TRUE;
import static org.apache.hadoop.fs.azurebfs.constants.ConfigurationKeys.FS_AZURE_ENABLE_MKDIR_OVERWRITE;
import static org.apache.hadoop.fs.azurebfs.constants.FileSystemConfigurations.DEFAULT_FS_AZURE_ENABLE_MKDIR_OVERWRITE;
import static org.apache.hadoop.fs.azurebfs.constants.HttpHeaderConfigurations.X_MS_METADATA_PREFIX;
import static org.apache.hadoop.fs.contract.ContractTestUtils.assertMkdirs;
import static org.apache.hadoop.test.LambdaTestUtils.intercept;
import static org.mockito.ArgumentMatchers.any;
import static org.mockito.ArgumentMatchers.anyList;
import static org.mockito.ArgumentMatchers.eq;

/**
* Test mkdir operation.
Expand Down Expand Up @@ -167,4 +181,107 @@ public void testMkdirWithExistingFilename() throws Exception {
intercept(FileAlreadyExistsException.class, () -> fs.mkdirs(new Path("/testFilePath")));
intercept(FileAlreadyExistsException.class, () -> fs.mkdirs(new Path("/testFilePath/newDir")));
}

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Do we need to add tests for this change in mkdir test file?
To me it looks like more of a metadata change.
We should add tests in ITestFileStatus, ITestListStatus, ITestAttributes classes.

All of them have dependency on this header to return right response.
Listing output also depends on this header is that also case-insensitive? Can we add some tests for that.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The reason for adding these test cases here is we are performing mkdir calls with different cases of Hdi_isfolder. Therefore, the focus is on how mkdir behaves in these scenarios.
But yes, it makes sense it is a metadata change so we can move it to other class if that seems more appropriate to you.

/**
* Test mkdirs with HDI folder configuration,
* verifying the correct header and directory state.
*/
@Test
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Add test for other variations such as ALL Caps, only one later caps, all small. Also one test can be when we set multiple metadata keys, the directory one is correctly identified.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Sure, will add the test cases.

public void testMkdirsWithDifferentCaseHDIConfig() throws Exception {
try (AzureBlobFileSystem fs = Mockito.spy(getFileSystem())) {
assumeBlobServiceType();
AbfsBlobClient abfsBlobClient = mockIngressClientHandler(fs);
String configName = X_MS_METADATA_PREFIX + "Hdi_isfolder";
// Mock the operation to modify the headers
mockAbfsRestOperation(abfsBlobClient, configName);

// Create the path and invoke mkdirs method
Path path = new Path("/testPath");
fs.mkdirs(path);

// Assert that the response header has the updated value
AbfsHttpOperation op = abfsBlobClient.getPathStatus(path.toUri().getPath(),
true, getTestTracingContext(fs, true),
null).getResult();

// Verify the header and directory state
Assertions.assertThat(op.getResponseHeader(configName))
.describedAs("Header should be set to true")
.isEqualTo(TRUE);
Assertions.assertThat(abfsBlobClient.checkIsDir(op))
.describedAs("Directory should be marked as true")
.isTrue();
}
}

/**
* Test mkdirs with wrong HDI folder configuration,
* verifying the correct header and directory state.
*/
@Test
public void testMkdirsWithWrongHDIConfig() throws Exception {
try (AzureBlobFileSystem fs = Mockito.spy(getFileSystem())) {
assumeBlobServiceType();
AbfsBlobClient abfsBlobClient = mockIngressClientHandler(fs);
String configName = X_MS_METADATA_PREFIX + "Hdi_isfolder1";

// Mock the operation to modify the headers
mockAbfsRestOperation(abfsBlobClient, configName);

// Create the path and invoke mkdirs method
Path path = new Path("/testPath");
fs.mkdirs(path);

// Assert the header and directory state
AbfsHttpOperation op = abfsBlobClient.getPathStatus(path.toUri().getPath(),
true, getTestTracingContext(fs, true),
null).getResult();

// Verify the header and directory state
Assertions.assertThat(op.getResponseHeader(configName))
.describedAs("Header should be set to TRUE")
.isEqualTo(TRUE);
Assertions.assertThat(abfsBlobClient.checkIsDir(op))
.describedAs("No Directory config set, should be marked as false")
.isFalse();
}
}

/**
* Helper method to mock the AbfsRestOperation and modify the request headers.
*
* @param abfsBlobClient the mocked AbfsBlobClient
* @param newHeader the header to add in place of the old one
*/
private void mockAbfsRestOperation(AbfsBlobClient abfsBlobClient, String newHeader) {
Mockito.doAnswer(invocation -> {
List<AbfsHttpHeader> requestHeaders = invocation.getArgument(3);

// Remove the actual HDI config header and add the new one
requestHeaders.removeIf(header ->
HttpHeaderConfigurations.X_MS_META_HDI_ISFOLDER.equals(header.getName()));
requestHeaders.add(new AbfsHttpHeader(newHeader, TRUE));

// Call the real method
return invocation.callRealMethod();
}).when(abfsBlobClient).getAbfsRestOperation(eq(AbfsRestOperationType.PutBlob),
eq(HTTP_METHOD_PUT), any(URL.class), anyList());
}

/**
* Helper method to mock the AbfsBlobClient and set up the client handler.
*
* @param fs the AzureBlobFileSystem instance
* @return the mocked AbfsBlobClient
*/
private AbfsBlobClient mockIngressClientHandler(AzureBlobFileSystem fs) {
AzureBlobFileSystemStore store = Mockito.spy(fs.getAbfsStore());
AbfsClientHandler clientHandler = Mockito.spy(store.getClientHandler());
AbfsBlobClient abfsBlobClient = (AbfsBlobClient) Mockito.spy(
clientHandler.getClient());
fs.getAbfsStore().setClient(abfsBlobClient);
fs.getAbfsStore().setClientHandler(clientHandler);
Mockito.doReturn(abfsBlobClient).when(clientHandler).getIngressClient();
return abfsBlobClient;
}
}