From fa8093e949cd03435df74386e6d3cd488d697423 Mon Sep 17 00:00:00 2001 From: Mukund Thakur Date: Thu, 28 May 2020 15:05:44 +0530 Subject: [PATCH 1/6] HADOOP-17022 Tune S3AFileSystem.listFiles() api. Perform list operations directly assuming the path to be a directory and then fallback to head checks for file. This optimization will reduce the number of remote calls to S3 when the input is a directory. This will also reduce the number of metastore calls in case of guarded store. --- .../apache/hadoop/fs/s3a/S3AFileSystem.java | 159 +++++++++++------- .../fs/s3a/ITestS3AFileOperationCost.java | 61 ++++++- 2 files changed, 157 insertions(+), 63 deletions(-) diff --git a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/S3AFileSystem.java b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/S3AFileSystem.java index 6d2b3a84ca702..a3f400ea26292 100644 --- a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/S3AFileSystem.java +++ b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/S3AFileSystem.java @@ -4181,79 +4181,114 @@ private RemoteIterator innerListFiles( Path path = qualify(f); LOG.debug("listFiles({}, {})", path, recursive); try { - // if a status was given, that is used, otherwise - // call getFileStatus, which triggers an existence check - final S3AFileStatus fileStatus = status != null - ? status - : (S3AFileStatus) getFileStatus(path); - if (fileStatus.isFile()) { + // if a status was given and it is a file. + if (status != null && status.isFile()) { // simple case: File LOG.debug("Path is a file"); return new Listing.SingleStatusRemoteIterator( - toLocatedFileStatus(fileStatus)); - } else { - // directory: do a bulk operation - String key = maybeAddTrailingSlash(pathToKey(path)); - String delimiter = recursive ? null : "/"; - LOG.debug("Requesting all entries under {} with delimiter '{}'", - key, delimiter); - final RemoteIterator cachedFilesIterator; - final Set tombstones; - boolean allowAuthoritative = allowAuthoritative(f); - if (recursive) { - final PathMetadata pm = metadataStore.get(path, true); - // shouldn't need to check pm.isDeleted() because that will have - // been caught by getFileStatus above. - MetadataStoreListFilesIterator metadataStoreListFilesIterator = - new MetadataStoreListFilesIterator(metadataStore, pm, - allowAuthoritative); - tombstones = metadataStoreListFilesIterator.listTombstones(); - // if all of the below is true - // - authoritative access is allowed for this metadatastore for this directory, - // - all the directory listings are authoritative on the client - // - the caller does not force non-authoritative access - // return the listing without any further s3 access - if (!forceNonAuthoritativeMS && - allowAuthoritative && - metadataStoreListFilesIterator.isRecursivelyAuthoritative()) { - S3AFileStatus[] statuses = S3Guard.iteratorToStatuses( - metadataStoreListFilesIterator, tombstones); - cachedFilesIterator = listing.createProvidedFileStatusIterator( - statuses, ACCEPT_ALL, acceptor); - return listing.createLocatedFileStatusIterator(cachedFilesIterator); - } - cachedFilesIterator = metadataStoreListFilesIterator; - } else { - DirListingMetadata meta = - S3Guard.listChildrenWithTtl(metadataStore, path, ttlTimeProvider, - allowAuthoritative); - if (meta != null) { - tombstones = meta.listTombstones(); - } else { - tombstones = null; - } - cachedFilesIterator = listing.createProvidedFileStatusIterator( - S3Guard.dirMetaToStatuses(meta), ACCEPT_ALL, acceptor); - if (allowAuthoritative && meta != null && meta.isAuthoritative()) { - // metadata listing is authoritative, so return it directly - return listing.createLocatedFileStatusIterator(cachedFilesIterator); - } + toLocatedFileStatus(status)); + } + // Assuming the path to be a directory + // do a bulk operation. + RemoteIterator listFilesAssumingDir = + getListFilesAssumingDir(path, + recursive, + acceptor, + collectTombstones, + forceNonAuthoritativeMS); + // If there are no list entries present, we + // fallback to file existence check as the path + // can be a file or empty directory. + if (!listFilesAssumingDir.hasNext()) { + final S3AFileStatus fileStatus = (S3AFileStatus) getFileStatus(path); + if (fileStatus.isFile()) { + return new Listing.SingleStatusRemoteIterator( + toLocatedFileStatus(fileStatus)); } - return listing.createTombstoneReconcilingIterator( - listing.createLocatedFileStatusIterator( - listing.createFileStatusListingIterator(path, - createListObjectsRequest(key, delimiter), - ACCEPT_ALL, - acceptor, - cachedFilesIterator)), - collectTombstones ? tombstones : null); } + // If we have reached here, it means either there are files + // in this directory or it is empty. + return listFilesAssumingDir; } catch (AmazonClientException e) { // TODO S3Guard: retry on file not found exception throw translateException("listFiles", path, e); } } + /** + * List files under a path assuming the path to be a directory. + * @param path input path. + * @param recursive recursive listing? + * @param acceptor file status filter + * @param collectTombstones should tombstones be collected from S3Guard? + * @param forceNonAuthoritativeMS forces metadata store to act like non + * authoritative. This is useful when + * listFiles output is used by import tool. + * @return an iterator over listing. + * @throws IOException any exception. + */ + private RemoteIterator getListFilesAssumingDir( + Path path, + boolean recursive, Listing.FileStatusAcceptor acceptor, + boolean collectTombstones, + boolean forceNonAuthoritativeMS) throws IOException { + + String key = maybeAddTrailingSlash(pathToKey(path)); + String delimiter = recursive ? null : "/"; + LOG.debug("Requesting all entries under {} with delimiter '{}'", + key, delimiter); + final RemoteIterator cachedFilesIterator; + final Set tombstones; + boolean allowAuthoritative = allowAuthoritative(path); + if (recursive) { + final PathMetadata pm = metadataStore.get(path, true); + // shouldn't need to check pm.isDeleted() because that will have + // been caught by getFileStatus above. + MetadataStoreListFilesIterator metadataStoreListFilesIterator = + new MetadataStoreListFilesIterator(metadataStore, pm, + allowAuthoritative); + tombstones = metadataStoreListFilesIterator.listTombstones(); + // if all of the below is true + // - authoritative access is allowed for this metadatastore for this directory, + // - all the directory listings are authoritative on the client + // - the caller does not force non-authoritative access + // return the listing without any further s3 access + if (!forceNonAuthoritativeMS && + allowAuthoritative && + metadataStoreListFilesIterator.isRecursivelyAuthoritative()) { + S3AFileStatus[] statuses = S3Guard.iteratorToStatuses( + metadataStoreListFilesIterator, tombstones); + cachedFilesIterator = listing.createProvidedFileStatusIterator( + statuses, ACCEPT_ALL, acceptor); + return listing.createLocatedFileStatusIterator(cachedFilesIterator); + } + cachedFilesIterator = metadataStoreListFilesIterator; + } else { + DirListingMetadata meta = + S3Guard.listChildrenWithTtl(metadataStore, path, ttlTimeProvider, + allowAuthoritative); + if (meta != null) { + tombstones = meta.listTombstones(); + } else { + tombstones = null; + } + cachedFilesIterator = listing.createProvidedFileStatusIterator( + S3Guard.dirMetaToStatuses(meta), ACCEPT_ALL, acceptor); + if (allowAuthoritative && meta != null && meta.isAuthoritative()) { + // metadata listing is authoritative, so return it directly + return listing.createLocatedFileStatusIterator(cachedFilesIterator); + } + } + return listing.createTombstoneReconcilingIterator( + listing.createLocatedFileStatusIterator( + listing.createFileStatusListingIterator(path, + createListObjectsRequest(key, delimiter), + ACCEPT_ALL, + acceptor, + cachedFilesIterator)), + collectTombstones ? tombstones : null); + } + /** * Override superclass so as to add statistic collection. * {@inheritDoc} diff --git a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/ITestS3AFileOperationCost.java b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/ITestS3AFileOperationCost.java index b2b983c4d4df7..158e0213503b2 100644 --- a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/ITestS3AFileOperationCost.java +++ b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/ITestS3AFileOperationCost.java @@ -69,7 +69,7 @@ public class ITestS3AFileOperationCost extends AbstractS3ATestBase { @Parameterized.Parameters(name = "{0}") public static Collection params() { return Arrays.asList(new Object[][]{ - {"raw", false}, +// {"raw", false}, {"guarded", true} }); } @@ -168,6 +168,65 @@ public void testCostOfListLocatedStatusOnNonEmptyDir() throws Throwable { } } + @Test + public void testCostOfListFilesOnFile() throws Throwable { + describe("Performing listFiles() on a file"); + Path file = path(getMethodName() + ".txt"); + S3AFileSystem fs = getFileSystem(); + touch(fs, file); + resetMetricDiffs(); + fs.listFiles(file, true); + if (!fs.hasMetadataStore()) { + metadataRequests.assertDiffEquals(1); + } else { + if (fs.allowAuthoritative(file)) { + listRequests.assertDiffEquals(0); + } else { + listRequests.assertDiffEquals(1); + } + } + } + + @Test + public void testCostOfListFilesOnEmptyDir() throws Throwable { + describe("Performing listFiles() on an empty dir"); + Path dir = path(getMethodName()); + S3AFileSystem fs = getFileSystem(); + fs.mkdirs(dir); + resetMetricDiffs(); + fs.listFiles(dir, true); + if (!fs.hasMetadataStore()) { + verifyOperationCount(2, 1); + } else { + if (fs.allowAuthoritative(dir)) { + verifyOperationCount(0, 0); + } else { + verifyOperationCount(0, 1); + } + } + } + + @Test + public void testCostOfListFilesOnNonEmptyDir() throws Throwable { + describe("Performing listFiles() on a non empty dir"); + Path dir = path(getMethodName()); + S3AFileSystem fs = getFileSystem(); + fs.mkdirs(dir); + Path file = new Path(dir, "file.txt"); + touch(fs, file); + resetMetricDiffs(); + fs.listFiles(dir, true); + if (!fs.hasMetadataStore()) { + verifyOperationCount(0, 1); + } else { + if (fs.allowAuthoritative(dir)) { + verifyOperationCount(0, 0); + } else { + verifyOperationCount(0, 1); + } + } + } + @Test public void testCostOfGetFileStatusOnFile() throws Throwable { describe("performing getFileStatus on a file"); From 35ff50e5a05cae38b072521cf0c4c3985177ae19 Mon Sep 17 00:00:00 2001 From: Mukund Thakur Date: Wed, 10 Jun 2020 13:48:29 +0530 Subject: [PATCH 2/6] Adding listFiles() and listLocatedStatus() checks in ITestAssumeRole --- .../src/main/java/org/apache/hadoop/fs/s3a/S3AFileSystem.java | 4 +++- .../org/apache/hadoop/fs/s3a/ITestS3AFileOperationCost.java | 2 +- .../java/org/apache/hadoop/fs/s3a/auth/ITestAssumeRole.java | 4 ++++ 3 files changed, 8 insertions(+), 2 deletions(-) diff --git a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/S3AFileSystem.java b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/S3AFileSystem.java index a3f400ea26292..1e17fdba73031 100644 --- a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/S3AFileSystem.java +++ b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/S3AFileSystem.java @@ -4200,7 +4200,9 @@ private RemoteIterator innerListFiles( // fallback to file existence check as the path // can be a file or empty directory. if (!listFilesAssumingDir.hasNext()) { - final S3AFileStatus fileStatus = (S3AFileStatus) getFileStatus(path); + final S3AFileStatus fileStatus = innerGetFileStatus(path, + false, + StatusProbeEnum.HEAD_OR_DIR_MARKER); if (fileStatus.isFile()) { return new Listing.SingleStatusRemoteIterator( toLocatedFileStatus(fileStatus)); diff --git a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/ITestS3AFileOperationCost.java b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/ITestS3AFileOperationCost.java index 158e0213503b2..fd1f3390013e5 100644 --- a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/ITestS3AFileOperationCost.java +++ b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/ITestS3AFileOperationCost.java @@ -69,7 +69,7 @@ public class ITestS3AFileOperationCost extends AbstractS3ATestBase { @Parameterized.Parameters(name = "{0}") public static Collection params() { return Arrays.asList(new Object[][]{ -// {"raw", false}, + {"raw", false}, {"guarded", true} }); } diff --git a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/auth/ITestAssumeRole.java b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/auth/ITestAssumeRole.java index cf935d28591ba..d10b62254c381 100644 --- a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/auth/ITestAssumeRole.java +++ b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/auth/ITestAssumeRole.java @@ -397,6 +397,10 @@ public void testAssumeRoleRestrictedPolicyFS() throws Exception { } forbidden("", () -> fs.listStatus(ROOT)); + forbidden("", + () -> fs.listFiles(ROOT, true)); + forbidden("", + () -> fs.listLocatedStatus(ROOT)); forbidden("", () -> fs.mkdirs(path("testAssumeRoleFS"))); } From 25ccf2d659437bfc52760bba63f06424122c00fc Mon Sep 17 00:00:00 2001 From: Mukund Thakur Date: Thu, 11 Jun 2020 21:06:54 +0530 Subject: [PATCH 3/6] Adding test for operations cost of listFiles() non existing directory Observed no change from previous implemetation. --- .../hadoop/fs/s3a/ITestS3AFileOperationCost.java | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/ITestS3AFileOperationCost.java b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/ITestS3AFileOperationCost.java index fd1f3390013e5..60b126868d258 100644 --- a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/ITestS3AFileOperationCost.java +++ b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/ITestS3AFileOperationCost.java @@ -227,6 +227,17 @@ public void testCostOfListFilesOnNonEmptyDir() throws Throwable { } } + @Test + public void testCostOfListFilesOnNonExistingDir() throws Throwable { + describe("Performing listFiles() on a non existing dir"); + Path dir = path(getMethodName()); + S3AFileSystem fs = getFileSystem(); + resetMetricDiffs(); + intercept(FileNotFoundException.class, + () -> fs.listFiles(dir, true)); + verifyOperationCount(2, 1); + } + @Test public void testCostOfGetFileStatusOnFile() throws Throwable { describe("performing getFileStatus on a file"); From 36aec4d13e1f810c5a35bd4486652ce9bedcc644 Mon Sep 17 00:00:00 2001 From: Mukund Thakur Date: Tue, 23 Jun 2020 13:14:24 +0530 Subject: [PATCH 4/6] Fix for test failures. Regression --- .../java/org/apache/hadoop/fs/s3a/S3AFileSystem.java | 11 +++++++++-- .../hadoop/fs/s3a/ITestS3GuardListConsistency.java | 6 +++++- 2 files changed, 14 insertions(+), 3 deletions(-) diff --git a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/S3AFileSystem.java b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/S3AFileSystem.java index 1e17fdba73031..be1d02a6180f7 100644 --- a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/S3AFileSystem.java +++ b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/S3AFileSystem.java @@ -4244,8 +4244,15 @@ private RemoteIterator getListFilesAssumingDir( boolean allowAuthoritative = allowAuthoritative(path); if (recursive) { final PathMetadata pm = metadataStore.get(path, true); - // shouldn't need to check pm.isDeleted() because that will have - // been caught by getFileStatus above. + if (pm != null) { + if (pm.isDeleted()) { + OffsetDateTime deletedAt = OffsetDateTime.ofInstant( + Instant.ofEpochMilli(pm.getFileStatus().getModificationTime()), + ZoneOffset.UTC); + throw new FileNotFoundException("Path " + path + " is recorded as " + + "deleted by S3Guard at " + deletedAt); + } + } MetadataStoreListFilesIterator metadataStoreListFilesIterator = new MetadataStoreListFilesIterator(metadataStore, pm, allowAuthoritative); diff --git a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/ITestS3GuardListConsistency.java b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/ITestS3GuardListConsistency.java index 6e55796fd3ae5..3c67e252e6e69 100644 --- a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/ITestS3GuardListConsistency.java +++ b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/ITestS3GuardListConsistency.java @@ -28,6 +28,7 @@ import org.apache.hadoop.fs.Path; import org.apache.hadoop.fs.RemoteIterator; import org.apache.hadoop.fs.contract.AbstractFSContract; +import org.apache.hadoop.fs.contract.ContractTestUtils; import org.apache.hadoop.fs.contract.s3a.S3AContract; import com.google.common.collect.Lists; @@ -271,7 +272,10 @@ public void testConsistentRenameAfterDelete() throws Exception { assertTrue(fs.delete(testDirs[1], false)); assertTrue(fs.delete(testDirs[2], false)); - fs.rename(path("a"), path("a3")); + ContractTestUtils.rename(fs, path("a"), path("a3")); + ContractTestUtils.assertPathsDoNotExist(fs, + "Source paths shouldn't exist post rename operation", + testDirs[0], testDirs[1], testDirs[2]); FileStatus[] paths = fs.listStatus(path("a3/b")); List list = new ArrayList<>(); for (FileStatus fileState : paths) { From 5b631fa6ed143cbcf313c59f70d609ca1db2050f Mon Sep 17 00:00:00 2001 From: Mukund Thakur Date: Thu, 25 Jun 2020 15:37:48 +0530 Subject: [PATCH 5/6] Fixing raw s3 test failures and some checkstyle --- .../org/apache/hadoop/fs/s3a/S3AFileSystem.java | 14 +++++++------- .../hadoop/fs/s3a/ITestS3AFileOperationCost.java | 4 ++-- .../apache/hadoop/fs/s3a/auth/ITestAssumeRole.java | 4 ++-- 3 files changed, 11 insertions(+), 11 deletions(-) diff --git a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/S3AFileSystem.java b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/S3AFileSystem.java index be1d02a6180f7..20cc6645d8186 100644 --- a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/S3AFileSystem.java +++ b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/S3AFileSystem.java @@ -4200,9 +4200,7 @@ private RemoteIterator innerListFiles( // fallback to file existence check as the path // can be a file or empty directory. if (!listFilesAssumingDir.hasNext()) { - final S3AFileStatus fileStatus = innerGetFileStatus(path, - false, - StatusProbeEnum.HEAD_OR_DIR_MARKER); + final S3AFileStatus fileStatus = (S3AFileStatus) getFileStatus(path); if (fileStatus.isFile()) { return new Listing.SingleStatusRemoteIterator( toLocatedFileStatus(fileStatus)); @@ -4246,9 +4244,10 @@ private RemoteIterator getListFilesAssumingDir( final PathMetadata pm = metadataStore.get(path, true); if (pm != null) { if (pm.isDeleted()) { - OffsetDateTime deletedAt = OffsetDateTime.ofInstant( - Instant.ofEpochMilli(pm.getFileStatus().getModificationTime()), - ZoneOffset.UTC); + OffsetDateTime deletedAt = OffsetDateTime + .ofInstant(Instant.ofEpochMilli( + pm.getFileStatus().getModificationTime()), + ZoneOffset.UTC); throw new FileNotFoundException("Path " + path + " is recorded as " + "deleted by S3Guard at " + deletedAt); } @@ -4258,7 +4257,8 @@ private RemoteIterator getListFilesAssumingDir( allowAuthoritative); tombstones = metadataStoreListFilesIterator.listTombstones(); // if all of the below is true - // - authoritative access is allowed for this metadatastore for this directory, + // - authoritative access is allowed for this metadatastore + // for this directory, // - all the directory listings are authoritative on the client // - the caller does not force non-authoritative access // return the listing without any further s3 access diff --git a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/ITestS3AFileOperationCost.java b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/ITestS3AFileOperationCost.java index 60b126868d258..4af2cf106aff4 100644 --- a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/ITestS3AFileOperationCost.java +++ b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/ITestS3AFileOperationCost.java @@ -234,8 +234,8 @@ public void testCostOfListFilesOnNonExistingDir() throws Throwable { S3AFileSystem fs = getFileSystem(); resetMetricDiffs(); intercept(FileNotFoundException.class, - () -> fs.listFiles(dir, true)); - verifyOperationCount(2, 1); + () -> fs.listFiles(dir, true)); + verifyOperationCount(2, 2); } @Test diff --git a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/auth/ITestAssumeRole.java b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/auth/ITestAssumeRole.java index d10b62254c381..4f6a1ff417873 100644 --- a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/auth/ITestAssumeRole.java +++ b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/auth/ITestAssumeRole.java @@ -398,9 +398,9 @@ public void testAssumeRoleRestrictedPolicyFS() throws Exception { forbidden("", () -> fs.listStatus(ROOT)); forbidden("", - () -> fs.listFiles(ROOT, true)); + () -> fs.listFiles(ROOT, true)); forbidden("", - () -> fs.listLocatedStatus(ROOT)); + () -> fs.listLocatedStatus(ROOT)); forbidden("", () -> fs.mkdirs(path("testAssumeRoleFS"))); } From abdbbbb1570e9acc0a29f5b50ca33f856e65c1bd Mon Sep 17 00:00:00 2001 From: Mukund Thakur Date: Mon, 6 Jul 2020 20:16:59 +0530 Subject: [PATCH 6/6] Reusing passed filestatus --- .../main/java/org/apache/hadoop/fs/s3a/S3AFileSystem.java | 5 ++++- .../org/apache/hadoop/fs/s3a/ITestS3AFileOperationCost.java | 2 +- 2 files changed, 5 insertions(+), 2 deletions(-) diff --git a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/S3AFileSystem.java b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/S3AFileSystem.java index 20cc6645d8186..bae2dbaab6b3c 100644 --- a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/S3AFileSystem.java +++ b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/S3AFileSystem.java @@ -4200,7 +4200,10 @@ private RemoteIterator innerListFiles( // fallback to file existence check as the path // can be a file or empty directory. if (!listFilesAssumingDir.hasNext()) { - final S3AFileStatus fileStatus = (S3AFileStatus) getFileStatus(path); + // If file status was already passed, reuse it. + final S3AFileStatus fileStatus = status != null + ? status + : (S3AFileStatus) getFileStatus(path); if (fileStatus.isFile()) { return new Listing.SingleStatusRemoteIterator( toLocatedFileStatus(fileStatus)); diff --git a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/ITestS3AFileOperationCost.java b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/ITestS3AFileOperationCost.java index 4af2cf106aff4..d8214cf6b84d0 100644 --- a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/ITestS3AFileOperationCost.java +++ b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/ITestS3AFileOperationCost.java @@ -234,7 +234,7 @@ public void testCostOfListFilesOnNonExistingDir() throws Throwable { S3AFileSystem fs = getFileSystem(); resetMetricDiffs(); intercept(FileNotFoundException.class, - () -> fs.listFiles(dir, true)); + () -> fs.listFiles(dir, true)); verifyOperationCount(2, 2); }