From 55dbf2256f068ed9416170bb1c97104251b54588 Mon Sep 17 00:00:00 2001 From: Anuj Modi Date: Sat, 5 Apr 2025 00:04:43 -0700 Subject: [PATCH 01/10] Read the stream in AbfsHttpOperation Itself --- .../services/AbfsAHCHttpOperation.java | 28 ++++++++++++++----- .../azurebfs/services/AbfsHttpOperation.java | 22 +++++++++++++-- 2 files changed, 40 insertions(+), 10 deletions(-) diff --git a/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/services/AbfsAHCHttpOperation.java b/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/services/AbfsAHCHttpOperation.java index 2569c6a4bd3d0..cc460a24af6c3 100644 --- a/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/services/AbfsAHCHttpOperation.java +++ b/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/services/AbfsAHCHttpOperation.java @@ -38,6 +38,7 @@ import org.apache.http.Header; import org.apache.http.HttpEntity; import org.apache.http.HttpResponse; +import org.apache.http.client.methods.CloseableHttpResponse; import org.apache.http.client.methods.HttpDelete; import org.apache.http.client.methods.HttpEntityEnclosingRequestBase; import org.apache.http.client.methods.HttpGet; @@ -47,6 +48,7 @@ import org.apache.http.client.methods.HttpPut; import org.apache.http.client.methods.HttpRequestBase; import org.apache.http.entity.ByteArrayEntity; +import org.apache.http.util.EntityUtils; import static org.apache.hadoop.fs.azurebfs.constants.AbfsHttpConstants.APACHE_IMPL; import static org.apache.hadoop.fs.azurebfs.constants.AbfsHttpConstants.EMPTY_STRING; @@ -192,14 +194,26 @@ String getConnResponseMessage() throws IOException { public void processResponse(final byte[] buffer, final int offset, final int length) throws IOException { - if (!isPayloadRequest) { - prepareRequest(); - LOG.debug("Sending request: {}", httpRequestBase); - httpResponse = executeRequest(); - LOG.debug("Request sent: {}; response {}", httpRequestBase, - httpResponse); + try { + if (!isPayloadRequest) { + prepareRequest(); + LOG.debug("Sending request: {}", httpRequestBase); + httpResponse = executeRequest(); + LOG.debug("Request sent: {}; response {}", httpRequestBase, + httpResponse); + } + parseResponseHeaderAndBody(buffer, offset, length); + } finally { + if (httpResponse != null) { + try { + EntityUtils.consume(httpResponse.getEntity()); + } finally { + if (httpResponse instanceof CloseableHttpResponse) { + ((CloseableHttpResponse) httpResponse).close(); + } + } + } } - parseResponseHeaderAndBody(buffer, offset, length); } /** diff --git a/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/services/AbfsHttpOperation.java b/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/services/AbfsHttpOperation.java index 89c97b68baa69..14cef92e53a82 100644 --- a/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/services/AbfsHttpOperation.java +++ b/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/services/AbfsHttpOperation.java @@ -18,6 +18,8 @@ package org.apache.hadoop.fs.azurebfs.services; +import java.io.ByteArrayInputStream; +import java.io.ByteArrayOutputStream; import java.io.IOException; import java.io.InputStream; import java.net.HttpURLConnection; @@ -396,8 +398,7 @@ final void parseResponse(final byte[] buffer, // consume the input stream to release resources int totalBytesRead = 0; - try { - InputStream stream = getContentInputStream(); + try (InputStream stream = getContentInputStream()) { if (isNullInputStream(stream)) { return; } @@ -409,7 +410,7 @@ final void parseResponse(final byte[] buffer, if (url.toString().contains(QUERY_PARAM_COMP + EQUAL + BLOCKLIST)) { parseBlockListResponse(stream); } else { - listResultStream = stream; + parseListPathResponse(stream); } } else { if (buffer != null) { @@ -500,6 +501,21 @@ private void parseBlockListResponse(final InputStream stream) throws IOException blockIdList = client.parseBlockListResponse(stream); } + private void parseListPathResponse(final InputStream stream) throws IOException { + if (stream == null || blockIdList != null) { + return; + } + try (ByteArrayOutputStream buffer = new ByteArrayOutputStream()) { + byte[] tempBuffer = new byte[CLEAN_UP_BUFFER_SIZE]; + int bytesRead; + while ((bytesRead = stream.read(tempBuffer, 0, CLEAN_UP_BUFFER_SIZE)) != -1) { + buffer.write(tempBuffer, 0, bytesRead); + } + byte[] responseData = buffer.toByteArray(); + listResultStream = new ByteArrayInputStream(responseData); + } + } + public List getBlockIdList() { return blockIdList; } From fde7917c30e1add78133d7bcd24970c2cf733afa Mon Sep 17 00:00:00 2001 From: Anuj Modi Date: Sat, 5 Apr 2025 00:52:17 -0700 Subject: [PATCH 02/10] Improved Exception Handling --- .../fs/azurebfs/services/AbfsBlobClient.java | 22 +++++++++---------- .../fs/azurebfs/services/AbfsDfsClient.java | 15 +++++++++---- .../azurebfs/services/AbfsHttpOperation.java | 7 ++++++ 3 files changed, 28 insertions(+), 16 deletions(-) diff --git a/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/services/AbfsBlobClient.java b/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/services/AbfsBlobClient.java index 32eba86774a8c..ba51e2f897867 100644 --- a/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/services/AbfsBlobClient.java +++ b/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/services/AbfsBlobClient.java @@ -1620,19 +1620,17 @@ public ListResponseData parseListPathResults(AbfsHttpOperation result, URI uri) LOG.debug("ListBlobs listed {} blobs with {} as continuation token", listResultSchema.paths().size(), listResultSchema.getNextMarker()); - } catch (SAXException | IOException e) { - throw new AbfsDriverException(e); + return filterDuplicateEntriesAndRenamePendingFiles(listResultSchema, uri); + } catch (SAXException | IOException ex) { + throw new AbfsDriverException(ERR_BLOB_LIST_PARSING, ex); } - } catch (IOException e) { - LOG.error("Unable to deserialize list results for uri {}", uri.toString(), e); - throw new AbfsDriverException(ERR_BLOB_LIST_PARSING, e); - } - - try { - return filterDuplicateEntriesAndRenamePendingFiles(listResultSchema, uri); - } catch (IOException e) { - LOG.error("Unable to filter list results for uri {}", uri.toString(), e); - throw new AbfsDriverException(ERR_BLOB_LIST_PARSING, e); + } catch (AbfsDriverException ex) { + // Throw as it is to avoid multiple wrapping. + LOG.error("Unable to deserialize list results for Uri {}", uri != null ? uri.toString(): "NULL", ex); + throw ex; + } catch (IOException ex) { + LOG.error("Unable to get stream for list results for uri {}", uri != null ? uri.toString(): "NULL", ex); + throw new AbfsDriverException(ERR_BLOB_LIST_PARSING, ex); } } diff --git a/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/services/AbfsDfsClient.java b/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/services/AbfsDfsClient.java index ef4194179dcb0..4a62f5f01bc29 100644 --- a/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/services/AbfsDfsClient.java +++ b/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/services/AbfsDfsClient.java @@ -38,9 +38,12 @@ import java.util.UUID; import com.fasterxml.jackson.core.JsonFactory; +import com.fasterxml.jackson.core.JsonParseException; import com.fasterxml.jackson.core.JsonParser; import com.fasterxml.jackson.core.JsonToken; +import com.fasterxml.jackson.databind.JsonMappingException; import com.fasterxml.jackson.databind.ObjectMapper; +import org.xml.sax.SAXException; import org.apache.hadoop.classification.VisibleForTesting; import org.apache.hadoop.fs.FileStatus; @@ -1487,8 +1490,8 @@ public ListResponseData parseListPathResults(AbfsHttpOperation result, URI uri) LOG.debug("ListPath listed {} paths with {} as continuation token", listResultSchema.paths().size(), getContinuationFromResponse(result)); - } catch (IOException ex) { - throw new AbfsDriverException(ex); + } catch (JsonParseException | JsonMappingException ex) { + throw new AbfsDriverException(ERR_DFS_LIST_PARSING, ex); } List fileStatuses = new ArrayList<>(); @@ -1501,9 +1504,13 @@ public ListResponseData parseListPathResults(AbfsHttpOperation result, URI uri) listResponseData.setContinuationToken( getContinuationFromResponse(result)); return listResponseData; + } catch (AbfsDriverException ex) { + // Throw as it is to avoid multiple wrapping. + LOG.error("Unable to deserialize list results for Uri {}", uri != null ? uri.toString(): "NULL", ex); + throw ex; } catch (IOException ex) { - LOG.error("Unable to deserialize list results for Uri {}", uri.toString(), ex); - throw new AbfsDriverException(ERR_DFS_LIST_PARSING, ex); + LOG.error("Unable to deserialize list results for Uri {}", uri != null ? uri.toString(): "NULL", ex); + throw new AbfsDriverException(ex); } } diff --git a/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/services/AbfsHttpOperation.java b/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/services/AbfsHttpOperation.java index 14cef92e53a82..21a5eeb05e640 100644 --- a/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/services/AbfsHttpOperation.java +++ b/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/services/AbfsHttpOperation.java @@ -30,8 +30,10 @@ import java.util.List; import java.util.Map; +import com.fasterxml.jackson.databind.JsonMappingException; import org.slf4j.Logger; import org.slf4j.LoggerFactory; +import org.xml.sax.SAXException; import org.apache.hadoop.classification.VisibleForTesting; import org.apache.hadoop.fs.azurebfs.constants.AbfsHttpConstants; @@ -439,6 +441,11 @@ final void parseResponse(final byte[] buffer, method, getMaskedUrl(), ex.getMessage()); log.debug("IO Error: ", ex); throw ex; + } catch (Exception ex) { + log.warn("Unexpected error: {} {}: {}", + method, getMaskedUrl(), ex.getMessage()); + log.debug("Unexpected Error: ", ex); + throw new IOException(ex); } finally { this.recvResponseTimeMs += elapsedTimeMs(startTime); this.bytesReceived = totalBytesRead; From e3f272aaa285701ddea68e9fca0364940811e166 Mon Sep 17 00:00:00 2001 From: Anuj Modi Date: Sat, 5 Apr 2025 01:30:40 -0700 Subject: [PATCH 03/10] Further Changes --- .../org/apache/hadoop/fs/azurebfs/services/AbfsDfsClient.java | 1 - .../apache/hadoop/fs/azurebfs/services/AbfsHttpOperation.java | 2 -- 2 files changed, 3 deletions(-) diff --git a/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/services/AbfsDfsClient.java b/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/services/AbfsDfsClient.java index 4a62f5f01bc29..058576d92712a 100644 --- a/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/services/AbfsDfsClient.java +++ b/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/services/AbfsDfsClient.java @@ -43,7 +43,6 @@ import com.fasterxml.jackson.core.JsonToken; import com.fasterxml.jackson.databind.JsonMappingException; import com.fasterxml.jackson.databind.ObjectMapper; -import org.xml.sax.SAXException; import org.apache.hadoop.classification.VisibleForTesting; import org.apache.hadoop.fs.FileStatus; diff --git a/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/services/AbfsHttpOperation.java b/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/services/AbfsHttpOperation.java index 21a5eeb05e640..bccde967a1306 100644 --- a/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/services/AbfsHttpOperation.java +++ b/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/services/AbfsHttpOperation.java @@ -30,10 +30,8 @@ import java.util.List; import java.util.Map; -import com.fasterxml.jackson.databind.JsonMappingException; import org.slf4j.Logger; import org.slf4j.LoggerFactory; -import org.xml.sax.SAXException; import org.apache.hadoop.classification.VisibleForTesting; import org.apache.hadoop.fs.azurebfs.constants.AbfsHttpConstants; From f965d4bc407a5b07a934e694918c9af5fe2fef74 Mon Sep 17 00:00:00 2001 From: Anuj Modi Date: Sun, 6 Apr 2025 21:30:19 -0700 Subject: [PATCH 04/10] Fixing Metric Related Tests --- .../fs/azurebfs/services/ITestApacheClientConnectionPool.java | 2 ++ .../org/apache/hadoop/fs/azurebfs/services/TestAbfsClient.java | 1 + 2 files changed, 3 insertions(+) diff --git a/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azurebfs/services/ITestApacheClientConnectionPool.java b/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azurebfs/services/ITestApacheClientConnectionPool.java index 4c4a748b72ead..9ff37332ad702 100644 --- a/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azurebfs/services/ITestApacheClientConnectionPool.java +++ b/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azurebfs/services/ITestApacheClientConnectionPool.java @@ -45,6 +45,7 @@ import static org.apache.hadoop.fs.azurebfs.constants.AbfsHttpConstants.COLON; import static org.apache.hadoop.fs.azurebfs.constants.AbfsHttpConstants.KEEP_ALIVE_CACHE_CLOSED; +import static org.apache.hadoop.fs.azurebfs.constants.ConfigurationKeys.FS_AZURE_METRIC_FORMAT; import static org.apache.hadoop.fs.azurebfs.constants.ConfigurationKeys.FS_AZURE_NETWORKING_LIBRARY; import static org.apache.hadoop.fs.azurebfs.constants.FileSystemUriSchemes.HTTPS_SCHEME; import static org.apache.hadoop.fs.azurebfs.constants.HttpOperationType.APACHE_HTTP_CLIENT; @@ -67,6 +68,7 @@ public ITestApacheClientConnectionPool() throws Exception { public void testKacIsClosed() throws Throwable { Configuration configuration = new Configuration(getRawConfiguration()); configuration.set(FS_AZURE_NETWORKING_LIBRARY, APACHE_HTTP_CLIENT.name()); + configuration.unset(FS_AZURE_METRIC_FORMAT); try (AzureBlobFileSystem fs = (AzureBlobFileSystem) FileSystem.newInstance( configuration)) { KeepAliveCache kac = fs.getAbfsStore().getClientHandler().getIngressClient() diff --git a/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azurebfs/services/TestAbfsClient.java b/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azurebfs/services/TestAbfsClient.java index 2495e50f43475..274230e4b382e 100644 --- a/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azurebfs/services/TestAbfsClient.java +++ b/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azurebfs/services/TestAbfsClient.java @@ -57,6 +57,7 @@ public class TestAbfsClient { public void testTimerInitializationWithoutMetricCollection() throws Exception { final Configuration configuration = new Configuration(); AbfsConfiguration abfsConfiguration = new AbfsConfiguration(configuration, ACCOUNT_NAME); + abfsConfiguration.unset(FS_AZURE_METRIC_FORMAT); AbfsCounters abfsCounters = Mockito.spy(new AbfsCountersImpl(new URI("abcd"))); AbfsClientContext abfsClientContext = new AbfsClientContextBuilder().withAbfsCounters(abfsCounters).build(); From 2689733f590535af4a627e7018b6dc8d7c53ea1a Mon Sep 17 00:00:00 2001 From: Anuj Modi Date: Sun, 6 Apr 2025 22:30:54 -0700 Subject: [PATCH 05/10] Code Refactoring --- .../hadoop/fs/azurebfs/services/AbfsBlobClient.java | 6 ++---- .../hadoop/fs/azurebfs/services/AbfsDfsClient.java | 3 ++- .../hadoop/fs/azurebfs/services/AbfsHttpOperation.java | 10 ++++------ 3 files changed, 8 insertions(+), 11 deletions(-) diff --git a/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/services/AbfsBlobClient.java b/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/services/AbfsBlobClient.java index ba51e2f897867..ff407c55b24b0 100644 --- a/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/services/AbfsBlobClient.java +++ b/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/services/AbfsBlobClient.java @@ -22,6 +22,7 @@ import javax.xml.parsers.ParserConfigurationException; import javax.xml.parsers.SAXParser; import javax.xml.parsers.SAXParserFactory; +import java.io.ByteArrayInputStream; import java.io.FileNotFoundException; import java.io.IOException; import java.io.InputStream; @@ -1606,10 +1607,7 @@ public Hashtable getXMSProperties(AbfsHttpOperation result) public ListResponseData parseListPathResults(AbfsHttpOperation result, URI uri) throws AzureBlobFileSystemException { BlobListResultSchema listResultSchema; - try (InputStream stream = result.getListResultStream()) { - if (stream == null) { - return null; - } + try (InputStream stream = new ByteArrayInputStream(result.getListResultData())) { try { final SAXParser saxParser = saxParserThreadLocal.get(); saxParser.reset(); diff --git a/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/services/AbfsDfsClient.java b/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/services/AbfsDfsClient.java index 058576d92712a..e5b8b02d1e7a0 100644 --- a/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/services/AbfsDfsClient.java +++ b/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/services/AbfsDfsClient.java @@ -18,6 +18,7 @@ package org.apache.hadoop.fs.azurebfs.services; +import java.io.ByteArrayInputStream; import java.io.FileNotFoundException; import java.io.IOException; import java.io.InputStream; @@ -1479,7 +1480,7 @@ public Hashtable getXMSProperties(AbfsHttpOperation result) */ @Override public ListResponseData parseListPathResults(AbfsHttpOperation result, URI uri) throws AzureBlobFileSystemException { - try (InputStream listResultInputStream = result.getListResultStream()) { + try (InputStream listResultInputStream = new ByteArrayInputStream(result.getListResultData())) { DfsListResultSchema listResultSchema; try { final ObjectMapper objectMapper = new ObjectMapper(); diff --git a/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/services/AbfsHttpOperation.java b/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/services/AbfsHttpOperation.java index bccde967a1306..eda4d270ca697 100644 --- a/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/services/AbfsHttpOperation.java +++ b/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/services/AbfsHttpOperation.java @@ -18,7 +18,6 @@ package org.apache.hadoop.fs.azurebfs.services; -import java.io.ByteArrayInputStream; import java.io.ByteArrayOutputStream; import java.io.IOException; import java.io.InputStream; @@ -77,7 +76,7 @@ public abstract class AbfsHttpOperation implements AbfsPerfLoggable { private String requestId = ""; private String expectedAppendPos = ""; private ListResultSchema listResultSchema = null; - private InputStream listResultStream = null; + private byte[] listResultData = null; private List blockIdList = null; // metrics @@ -223,8 +222,8 @@ public ListResultSchema getListResultSchema() { return listResultSchema; } - public final InputStream getListResultStream() { - return listResultStream; + public final byte[] getListResultData() { + return listResultData; } /** @@ -516,8 +515,7 @@ private void parseListPathResponse(final InputStream stream) throws IOException while ((bytesRead = stream.read(tempBuffer, 0, CLEAN_UP_BUFFER_SIZE)) != -1) { buffer.write(tempBuffer, 0, bytesRead); } - byte[] responseData = buffer.toByteArray(); - listResultStream = new ByteArrayInputStream(responseData); + listResultData = buffer.toByteArray(); } } From 60f1c38ac0d8bb2f70cdee0e53fbd6c25b912389 Mon Sep 17 00:00:00 2001 From: Anuj Modi Date: Mon, 7 Apr 2025 02:40:14 -0700 Subject: [PATCH 06/10] Code Checks --- .../fs/azurebfs/services/AbfsBlobClient.java | 2 +- .../fs/azurebfs/services/AbfsDfsClient.java | 31 +++++++++---------- .../azurebfs/services/AbfsHttpOperation.java | 21 ++++++++----- 3 files changed, 29 insertions(+), 25 deletions(-) diff --git a/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/services/AbfsBlobClient.java b/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/services/AbfsBlobClient.java index ff407c55b24b0..b39b8a285ddc4 100644 --- a/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/services/AbfsBlobClient.java +++ b/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/services/AbfsBlobClient.java @@ -1606,9 +1606,9 @@ public Hashtable getXMSProperties(AbfsHttpOperation result) @Override public ListResponseData parseListPathResults(AbfsHttpOperation result, URI uri) throws AzureBlobFileSystemException { - BlobListResultSchema listResultSchema; try (InputStream stream = new ByteArrayInputStream(result.getListResultData())) { try { + BlobListResultSchema listResultSchema; final SAXParser saxParser = saxParserThreadLocal.get(); saxParser.reset(); listResultSchema = new BlobListResultSchema(); diff --git a/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/services/AbfsDfsClient.java b/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/services/AbfsDfsClient.java index e5b8b02d1e7a0..de608c3e6b3ef 100644 --- a/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/services/AbfsDfsClient.java +++ b/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/services/AbfsDfsClient.java @@ -1479,31 +1479,30 @@ public Hashtable getXMSProperties(AbfsHttpOperation result) * @throws AzureBlobFileSystemException if parsing fails. */ @Override - public ListResponseData parseListPathResults(AbfsHttpOperation result, URI uri) throws AzureBlobFileSystemException { - try (InputStream listResultInputStream = new ByteArrayInputStream(result.getListResultData())) { - DfsListResultSchema listResultSchema; + public ListResponseData parseListPathResults(AbfsHttpOperation result, URI uri) + throws AzureBlobFileSystemException { + try (InputStream stream = new ByteArrayInputStream(result.getListResultData())) { try { + DfsListResultSchema listResultSchema; final ObjectMapper objectMapper = new ObjectMapper(); - listResultSchema = objectMapper.readValue(listResultInputStream, - DfsListResultSchema.class); + listResultSchema = objectMapper.readValue(stream, DfsListResultSchema.class); result.setListResultSchema(listResultSchema); LOG.debug("ListPath listed {} paths with {} as continuation token", listResultSchema.paths().size(), getContinuationFromResponse(result)); + List fileStatuses = new ArrayList<>(); + for (DfsListResultEntrySchema entry : listResultSchema.paths()) { + fileStatuses.add(getVersionedFileStatusFromEntry(entry, uri)); + } + ListResponseData listResponseData = new ListResponseData(); + listResponseData.setFileStatusList(fileStatuses); + listResponseData.setRenamePendingJsonPaths(null); + listResponseData.setContinuationToken( + getContinuationFromResponse(result)); + return listResponseData; } catch (JsonParseException | JsonMappingException ex) { throw new AbfsDriverException(ERR_DFS_LIST_PARSING, ex); } - - List fileStatuses = new ArrayList<>(); - for (DfsListResultEntrySchema entry : listResultSchema.paths()) { - fileStatuses.add(getVersionedFileStatusFromEntry(entry, uri)); - } - ListResponseData listResponseData = new ListResponseData(); - listResponseData.setFileStatusList(fileStatuses); - listResponseData.setRenamePendingJsonPaths(null); - listResponseData.setContinuationToken( - getContinuationFromResponse(result)); - return listResponseData; } catch (AbfsDriverException ex) { // Throw as it is to avoid multiple wrapping. LOG.error("Unable to deserialize list results for Uri {}", uri != null ? uri.toString(): "NULL", ex); diff --git a/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/services/AbfsHttpOperation.java b/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/services/AbfsHttpOperation.java index eda4d270ca697..38d7666eab0b0 100644 --- a/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/services/AbfsHttpOperation.java +++ b/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/services/AbfsHttpOperation.java @@ -226,6 +226,18 @@ public final byte[] getListResultData() { return listResultData; } + @VisibleForTesting + public byte[] readDataFromStream(InputStream stream) throws IOException{ + try (ByteArrayOutputStream buffer = new ByteArrayOutputStream()) { + byte[] tempBuffer = new byte[CLEAN_UP_BUFFER_SIZE]; + int bytesRead; + while ((bytesRead = stream.read(tempBuffer, 0, CLEAN_UP_BUFFER_SIZE)) != -1) { + buffer.write(tempBuffer, 0, bytesRead); + } + return buffer.toByteArray(); + } + } + /** * Get response header value for the given headerKey. * @@ -509,14 +521,7 @@ private void parseListPathResponse(final InputStream stream) throws IOException if (stream == null || blockIdList != null) { return; } - try (ByteArrayOutputStream buffer = new ByteArrayOutputStream()) { - byte[] tempBuffer = new byte[CLEAN_UP_BUFFER_SIZE]; - int bytesRead; - while ((bytesRead = stream.read(tempBuffer, 0, CLEAN_UP_BUFFER_SIZE)) != -1) { - buffer.write(tempBuffer, 0, bytesRead); - } - listResultData = buffer.toByteArray(); - } + listResultData = readDataFromStream(stream); } public List getBlockIdList() { From f968baa3e0332bf5eb8b8c5e7b08e6efa2e3803a Mon Sep 17 00:00:00 2001 From: Anuj Modi Date: Mon, 7 Apr 2025 04:57:06 -0700 Subject: [PATCH 07/10] Resolving Comments --- .../fs/azurebfs/services/AbfsBlobClient.java | 5 +-- .../fs/azurebfs/services/AbfsDfsClient.java | 4 +-- .../azurebfs/services/AbfsHttpOperation.java | 7 +++- .../ITestAzureBlobFileSystemListStatus.java | 32 +++++++++++++++++++ 4 files changed, 43 insertions(+), 5 deletions(-) diff --git a/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/services/AbfsBlobClient.java b/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/services/AbfsBlobClient.java index b39b8a285ddc4..6890b818eea64 100644 --- a/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/services/AbfsBlobClient.java +++ b/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/services/AbfsBlobClient.java @@ -1626,7 +1626,7 @@ public ListResponseData parseListPathResults(AbfsHttpOperation result, URI uri) // Throw as it is to avoid multiple wrapping. LOG.error("Unable to deserialize list results for Uri {}", uri != null ? uri.toString(): "NULL", ex); throw ex; - } catch (IOException ex) { + } catch (Exception ex) { LOG.error("Unable to get stream for list results for uri {}", uri != null ? uri.toString(): "NULL", ex); throw new AbfsDriverException(ERR_BLOB_LIST_PARSING, ex); } @@ -1926,7 +1926,8 @@ private List getMetadataHeadersList(final Hashtable fileStatuses = new ArrayList<>(); Map renamePendingJsonPaths = new HashMap<>(); diff --git a/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/services/AbfsDfsClient.java b/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/services/AbfsDfsClient.java index de608c3e6b3ef..12ded7567c97e 100644 --- a/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/services/AbfsDfsClient.java +++ b/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/services/AbfsDfsClient.java @@ -1507,9 +1507,9 @@ public ListResponseData parseListPathResults(AbfsHttpOperation result, URI uri) // Throw as it is to avoid multiple wrapping. LOG.error("Unable to deserialize list results for Uri {}", uri != null ? uri.toString(): "NULL", ex); throw ex; - } catch (IOException ex) { + } catch (Exception ex) { LOG.error("Unable to deserialize list results for Uri {}", uri != null ? uri.toString(): "NULL", ex); - throw new AbfsDriverException(ex); + throw new AbfsDriverException(ERR_DFS_LIST_PARSING, ex); } } diff --git a/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/services/AbfsHttpOperation.java b/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/services/AbfsHttpOperation.java index 38d7666eab0b0..43a5a6d49c66f 100644 --- a/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/services/AbfsHttpOperation.java +++ b/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/services/AbfsHttpOperation.java @@ -517,8 +517,13 @@ private void parseBlockListResponse(final InputStream stream) throws IOException blockIdList = client.parseBlockListResponse(stream); } + /** + * Parse the list path response from the network stream and save response into a buffer. + * @param stream Network InputStream. + * @throws IOException if an error occurs while reading the stream. + */ private void parseListPathResponse(final InputStream stream) throws IOException { - if (stream == null || blockIdList != null) { + if (stream == null || listResultData != null) { return; } listResultData = readDataFromStream(stream); diff --git a/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azurebfs/ITestAzureBlobFileSystemListStatus.java b/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azurebfs/ITestAzureBlobFileSystemListStatus.java index 7f7d96bd4773b..10a67092edcc0 100644 --- a/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azurebfs/ITestAzureBlobFileSystemListStatus.java +++ b/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azurebfs/ITestAzureBlobFileSystemListStatus.java @@ -20,6 +20,7 @@ import java.io.FileNotFoundException; import java.io.IOException; +import java.net.SocketException; import java.net.SocketTimeoutException; import java.net.URL; import java.util.ArrayList; @@ -31,6 +32,7 @@ import org.assertj.core.api.Assertions; import org.junit.Test; +import org.mockito.Mock; import org.mockito.Mockito; import org.mockito.stubbing.Stubber; @@ -40,8 +42,10 @@ import org.apache.hadoop.fs.FileStatus; import org.apache.hadoop.fs.LocatedFileStatus; import org.apache.hadoop.fs.Path; +import org.apache.hadoop.fs.azurebfs.constants.AbfsServiceType; import org.apache.hadoop.fs.azurebfs.constants.FSOperationType; import org.apache.hadoop.fs.azurebfs.constants.HttpHeaderConfigurations; +import org.apache.hadoop.fs.azurebfs.contracts.exceptions.AbfsDriverException; import org.apache.hadoop.fs.azurebfs.contracts.exceptions.AbfsRestOperationException; import org.apache.hadoop.fs.azurebfs.services.AbfsBlobClient; import org.apache.hadoop.fs.azurebfs.services.AbfsClientHandler; @@ -63,7 +67,10 @@ import static org.apache.hadoop.fs.azurebfs.constants.AbfsHttpConstants.TRUE; import static org.apache.hadoop.fs.azurebfs.constants.ConfigurationKeys.AZURE_LIST_MAX_RESULTS; import static org.apache.hadoop.fs.azurebfs.constants.HttpHeaderConfigurations.X_MS_METADATA_PREFIX; +import static org.apache.hadoop.fs.azurebfs.contracts.services.AzureServiceErrorCode.COPY_BLOB_ABORTED; +import static org.apache.hadoop.fs.azurebfs.services.AbfsErrors.ERR_BLOB_LIST_PARSING; import static org.apache.hadoop.fs.azurebfs.services.RenameAtomicity.SUFFIX; +import static org.apache.hadoop.fs.azurebfs.services.RetryReasonConstants.CONNECTION_RESET_MESSAGE; import static org.apache.hadoop.fs.azurebfs.services.RetryReasonConstants.CONNECTION_TIMEOUT_JDK_MESSAGE; import static org.apache.hadoop.fs.contract.ContractTestUtils.assertMkdirs; import static org.apache.hadoop.fs.contract.ContractTestUtils.createFile; @@ -176,6 +183,31 @@ public void testListPathTracingContext() throws Exception { Mockito.verify(spiedTracingContext, times(0)).constructHeader(any(), any(), any()); } + @Test + public void testListPathParsingFailure() throws Exception { + assumeBlobServiceType(); + AzureBlobFileSystem spiedFs = Mockito.spy(getFileSystem()); + AzureBlobFileSystemStore spiedStore = Mockito.spy(spiedFs.getAbfsStore()); + AbfsBlobClient spiedClient = Mockito.spy(spiedStore.getClientHandler() + .getBlobClient()); + Mockito.doReturn(spiedStore).when(spiedFs).getAbfsStore(); + Mockito.doReturn(spiedClient).when(spiedStore).getClient(); + + Mockito.doThrow(new SocketException(CONNECTION_RESET_MESSAGE)).when(spiedClient).filterDuplicateEntriesAndRenamePendingFiles(any(), any()); + List fileStatuses = new ArrayList<>(); + AbfsDriverException ex = intercept(AbfsDriverException.class, + () -> { + spiedStore.listStatus(new Path("/"), "", fileStatuses, + true, null, getTestTracingContext(spiedFs, true)); + }); + Assertions.assertThat(ex.getStatusCode()) + .describedAs("Expecting Network Error status code") + .isEqualTo(-1); + Assertions.assertThat(ex.getErrorMessage()) + .describedAs("Expecting COPY_ABORTED error code") + .contains(ERR_BLOB_LIST_PARSING); + } + /** * Creates a file, verifies that listStatus returns it, * even while the file is still open for writing. From 5df635d94f8ec0b8c6ae44f3809a4b7954a37495 Mon Sep 17 00:00:00 2001 From: Anuj Modi Date: Mon, 7 Apr 2025 06:05:09 -0700 Subject: [PATCH 08/10] Test Enhancements --- .../azurebfs/ITestAzureBlobFileSystemListStatus.java | 12 ++++++++---- 1 file changed, 8 insertions(+), 4 deletions(-) diff --git a/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azurebfs/ITestAzureBlobFileSystemListStatus.java b/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azurebfs/ITestAzureBlobFileSystemListStatus.java index 10a67092edcc0..577b44cdb657b 100644 --- a/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azurebfs/ITestAzureBlobFileSystemListStatus.java +++ b/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azurebfs/ITestAzureBlobFileSystemListStatus.java @@ -32,7 +32,6 @@ import org.assertj.core.api.Assertions; import org.junit.Test; -import org.mockito.Mock; import org.mockito.Mockito; import org.mockito.stubbing.Stubber; @@ -42,7 +41,6 @@ import org.apache.hadoop.fs.FileStatus; import org.apache.hadoop.fs.LocatedFileStatus; import org.apache.hadoop.fs.Path; -import org.apache.hadoop.fs.azurebfs.constants.AbfsServiceType; import org.apache.hadoop.fs.azurebfs.constants.FSOperationType; import org.apache.hadoop.fs.azurebfs.constants.HttpHeaderConfigurations; import org.apache.hadoop.fs.azurebfs.contracts.exceptions.AbfsDriverException; @@ -67,10 +65,10 @@ import static org.apache.hadoop.fs.azurebfs.constants.AbfsHttpConstants.TRUE; import static org.apache.hadoop.fs.azurebfs.constants.ConfigurationKeys.AZURE_LIST_MAX_RESULTS; import static org.apache.hadoop.fs.azurebfs.constants.HttpHeaderConfigurations.X_MS_METADATA_PREFIX; -import static org.apache.hadoop.fs.azurebfs.contracts.services.AzureServiceErrorCode.COPY_BLOB_ABORTED; import static org.apache.hadoop.fs.azurebfs.services.AbfsErrors.ERR_BLOB_LIST_PARSING; import static org.apache.hadoop.fs.azurebfs.services.RenameAtomicity.SUFFIX; import static org.apache.hadoop.fs.azurebfs.services.RetryReasonConstants.CONNECTION_RESET_MESSAGE; +import static org.apache.hadoop.fs.azurebfs.services.RetryReasonConstants.CONNECTION_TIMEOUT_ABBREVIATION; import static org.apache.hadoop.fs.azurebfs.services.RetryReasonConstants.CONNECTION_TIMEOUT_JDK_MESSAGE; import static org.apache.hadoop.fs.contract.ContractTestUtils.assertMkdirs; import static org.apache.hadoop.fs.contract.ContractTestUtils.createFile; @@ -137,7 +135,9 @@ public Void call() throws Exception { /** * Test to verify that each paginated call to ListBlobs uses a new tracing context. - * @throws Exception + * Test also verifies that the retry policy is called when a SocketTimeoutException + * Test also verifies that empty list with valid continuation token is handled. + * @throws Exception if there is an error or test assertions fails. */ @Test public void testListPathTracingContext() throws Exception { @@ -167,6 +167,10 @@ public void testListPathTracingContext() throws Exception { List fileStatuses = new ArrayList<>(); spiedStore.listStatus(new Path("/"), "", fileStatuses, true, null, spiedTracingContext); + // Assert that there were retries due to SocketTimeoutException + Mockito.verify(spiedClient, Mockito.times(1)) + .getRetryPolicy(CONNECTION_TIMEOUT_ABBREVIATION); + // Assert that there were 2 paginated ListPath calls were made 1 and 2. // 1. Without continuation token Mockito.verify(spiedClient, times(1)).listPath( From 5b9dfa5ff9977cd1be60a332ff34ba60f25d123c Mon Sep 17 00:00:00 2001 From: Anuj Modi Date: Mon, 7 Apr 2025 07:41:37 -0700 Subject: [PATCH 09/10] PR Checks --- .../fs/azurebfs/services/AbfsBlobClient.java | 1 + .../azurebfs/services/AbfsHttpOperation.java | 21 +++++++------------ 2 files changed, 9 insertions(+), 13 deletions(-) diff --git a/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/services/AbfsBlobClient.java b/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/services/AbfsBlobClient.java index 6890b818eea64..bee2b1785228d 100644 --- a/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/services/AbfsBlobClient.java +++ b/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/services/AbfsBlobClient.java @@ -1925,6 +1925,7 @@ private List getMetadataHeadersList(final Hashtable getBlockIdList() { From fde17834bcfe03986216a29655cd340e24ad5b10 Mon Sep 17 00:00:00 2001 From: Anuj Modi Date: Mon, 7 Apr 2025 20:21:08 -0700 Subject: [PATCH 10/10] PR Checks --- .../hadoop/fs/azurebfs/services/AbfsBlobClient.java | 3 +-- .../hadoop/fs/azurebfs/services/AbfsDfsClient.java | 3 +-- .../fs/azurebfs/services/AbfsHttpOperation.java | 11 ++++++----- 3 files changed, 8 insertions(+), 9 deletions(-) diff --git a/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/services/AbfsBlobClient.java b/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/services/AbfsBlobClient.java index bee2b1785228d..c41df10d425ad 100644 --- a/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/services/AbfsBlobClient.java +++ b/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/services/AbfsBlobClient.java @@ -22,7 +22,6 @@ import javax.xml.parsers.ParserConfigurationException; import javax.xml.parsers.SAXParser; import javax.xml.parsers.SAXParserFactory; -import java.io.ByteArrayInputStream; import java.io.FileNotFoundException; import java.io.IOException; import java.io.InputStream; @@ -1606,7 +1605,7 @@ public Hashtable getXMSProperties(AbfsHttpOperation result) @Override public ListResponseData parseListPathResults(AbfsHttpOperation result, URI uri) throws AzureBlobFileSystemException { - try (InputStream stream = new ByteArrayInputStream(result.getListResultData())) { + try (InputStream stream = result.getListResultStream()) { try { BlobListResultSchema listResultSchema; final SAXParser saxParser = saxParserThreadLocal.get(); diff --git a/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/services/AbfsDfsClient.java b/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/services/AbfsDfsClient.java index 12ded7567c97e..0e988162a0873 100644 --- a/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/services/AbfsDfsClient.java +++ b/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/services/AbfsDfsClient.java @@ -18,7 +18,6 @@ package org.apache.hadoop.fs.azurebfs.services; -import java.io.ByteArrayInputStream; import java.io.FileNotFoundException; import java.io.IOException; import java.io.InputStream; @@ -1481,7 +1480,7 @@ public Hashtable getXMSProperties(AbfsHttpOperation result) @Override public ListResponseData parseListPathResults(AbfsHttpOperation result, URI uri) throws AzureBlobFileSystemException { - try (InputStream stream = new ByteArrayInputStream(result.getListResultData())) { + try (InputStream stream = result.getListResultStream()) { try { DfsListResultSchema listResultSchema; final ObjectMapper objectMapper = new ObjectMapper(); diff --git a/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/services/AbfsHttpOperation.java b/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/services/AbfsHttpOperation.java index 4b8afe96e8dd4..81e33c5c22254 100644 --- a/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/services/AbfsHttpOperation.java +++ b/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/services/AbfsHttpOperation.java @@ -18,6 +18,7 @@ package org.apache.hadoop.fs.azurebfs.services; +import java.io.ByteArrayInputStream; import java.io.ByteArrayOutputStream; import java.io.IOException; import java.io.InputStream; @@ -76,7 +77,7 @@ public abstract class AbfsHttpOperation implements AbfsPerfLoggable { private String requestId = ""; private String expectedAppendPos = ""; private ListResultSchema listResultSchema = null; - private byte[] listResultData = null; + private InputStream listResultStream = null; private List blockIdList = null; // metrics @@ -222,8 +223,8 @@ public ListResultSchema getListResultSchema() { return listResultSchema; } - public final byte[] getListResultData() { - return listResultData; + public InputStream getListResultStream() { + return listResultStream; } /** @@ -511,7 +512,7 @@ private void parseBlockListResponse(final InputStream stream) throws IOException * @throws IOException if an error occurs while reading the stream. */ private void parseListPathResponse(final InputStream stream) throws IOException { - if (stream == null || listResultData != null) { + if (stream == null || listResultStream != null) { return; } try (ByteArrayOutputStream buffer = new ByteArrayOutputStream()) { @@ -520,7 +521,7 @@ private void parseListPathResponse(final InputStream stream) throws IOException while ((bytesRead = stream.read(tempBuffer, 0, CLEAN_UP_BUFFER_SIZE)) != -1) { buffer.write(tempBuffer, 0, bytesRead); } - listResultData = buffer.toByteArray(); + listResultStream = new ByteArrayInputStream(buffer.toByteArray()); } }