Skip to content
Closed
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -4181,79 +4181,114 @@ private RemoteIterator<S3ALocatedFileStatus> innerListFiles(
Path path = qualify(f);
LOG.debug("listFiles({}, {})", path, recursive);
try {
// if a status was given, that is used, otherwise
// call getFileStatus, which triggers an existence check
final S3AFileStatus fileStatus = status != null
? status
: (S3AFileStatus) getFileStatus(path);
if (fileStatus.isFile()) {
// if a status was given and it is a file.
if (status != null && status.isFile()) {
// simple case: File
LOG.debug("Path is a file");
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

if we are keeping this, include the path f in the log message, just to make more sense of it in a busy log

return new Listing.SingleStatusRemoteIterator(
toLocatedFileStatus(fileStatus));
} else {
// directory: do a bulk operation
String key = maybeAddTrailingSlash(pathToKey(path));
String delimiter = recursive ? null : "/";
LOG.debug("Requesting all entries under {} with delimiter '{}'",
key, delimiter);
final RemoteIterator<S3AFileStatus> cachedFilesIterator;
final Set<Path> tombstones;
boolean allowAuthoritative = allowAuthoritative(f);
if (recursive) {
final PathMetadata pm = metadataStore.get(path, true);
// shouldn't need to check pm.isDeleted() because that will have
// been caught by getFileStatus above.
MetadataStoreListFilesIterator metadataStoreListFilesIterator =
new MetadataStoreListFilesIterator(metadataStore, pm,
allowAuthoritative);
tombstones = metadataStoreListFilesIterator.listTombstones();
// if all of the below is true
// - authoritative access is allowed for this metadatastore for this directory,
// - all the directory listings are authoritative on the client
// - the caller does not force non-authoritative access
// return the listing without any further s3 access
if (!forceNonAuthoritativeMS &&
allowAuthoritative &&
metadataStoreListFilesIterator.isRecursivelyAuthoritative()) {
S3AFileStatus[] statuses = S3Guard.iteratorToStatuses(
metadataStoreListFilesIterator, tombstones);
cachedFilesIterator = listing.createProvidedFileStatusIterator(
statuses, ACCEPT_ALL, acceptor);
return listing.createLocatedFileStatusIterator(cachedFilesIterator);
}
cachedFilesIterator = metadataStoreListFilesIterator;
} else {
DirListingMetadata meta =
S3Guard.listChildrenWithTtl(metadataStore, path, ttlTimeProvider,
allowAuthoritative);
if (meta != null) {
tombstones = meta.listTombstones();
} else {
tombstones = null;
}
cachedFilesIterator = listing.createProvidedFileStatusIterator(
S3Guard.dirMetaToStatuses(meta), ACCEPT_ALL, acceptor);
if (allowAuthoritative && meta != null && meta.isAuthoritative()) {
// metadata listing is authoritative, so return it directly
return listing.createLocatedFileStatusIterator(cachedFilesIterator);
}
toLocatedFileStatus(status));
}
// Assuming the path to be a directory
// do a bulk operation.
RemoteIterator<S3ALocatedFileStatus> listFilesAssumingDir =
getListFilesAssumingDir(path,
recursive,
acceptor,
collectTombstones,
forceNonAuthoritativeMS);
// If there are no list entries present, we
// fallback to file existence check as the path
// can be a file or empty directory.
if (!listFilesAssumingDir.hasNext()) {
final S3AFileStatus fileStatus = (S3AFileStatus) getFileStatus(path);
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

innerGetFileStatus with probes for file and dir marker -HEAD_OR_DIR_MARKER; we don't need to issue a second LIST call, do we? This will save a call on listing a missing path

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Done

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

We can't do this as we will need a list for an directory which is empty. Else we will get FNFE.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

yea, but an empty dir is that HEAD marker. So no list, right?

Don't worry about it -my dir marker tuning changes things anyway, replacing the HEAD + / with a list.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

If there is an empty directory within a base directory. For example directory structure in test ITestS3AContractGetFileStatus>AbstractContractGetFileStatusTest.testListFilesEmptyDirectoryRecursive
There won't be any files thus listing will be empty.

if (fileStatus.isFile()) {
return new Listing.SingleStatusRemoteIterator(
toLocatedFileStatus(fileStatus));
}
return listing.createTombstoneReconcilingIterator(
listing.createLocatedFileStatusIterator(
listing.createFileStatusListingIterator(path,
createListObjectsRequest(key, delimiter),
ACCEPT_ALL,
acceptor,
cachedFilesIterator)),
collectTombstones ? tombstones : null);
}
// If we have reached here, it means either there are files
// in this directory or it is empty.
return listFilesAssumingDir;
} catch (AmazonClientException e) {
// TODO S3Guard: retry on file not found exception
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

we need to consider this and see if the TODO is already done.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I don't see any retries in listFiles().
Even the listLocatedStatus() and listStatus() are called with Invoker.Once(). That means just the calls with exception translated without any retries.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

lets cut that TODO line

throw translateException("listFiles", path, e);
}
}

/**
* List files under a path assuming the path to be a directory.
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

i wonder if we can/should move this into the Listing class.

in #2046 I'm looking at doing that for openfile, just to pull something complex out of the main fs. I know it hurts maintenance a bit (all subsequent patches will need this patch before cherry picking), but your work is big enough that will happen anyway.

imagine we made the Listing class a subclass of org.apache.hadoop.fs.s3a.impl.AbstractStoreOperation and added a callback interface purely to those s3a FS calls it needs - just like we do with rename with org.apache.hadoop.fs.s3a.impl.OperationCallbacks. we'd then move all the work over there and isolate it better.

of course, one thing this will make harder is for my directory marker changes, but that shouldn't be a blocker.

* @param path input path.
* @param recursive recursive listing?
* @param acceptor file status filter
* @param collectTombstones should tombstones be collected from S3Guard?
* @param forceNonAuthoritativeMS forces metadata store to act like non
* authoritative. This is useful when
* listFiles output is used by import tool.
* @return an iterator over listing.
* @throws IOException any exception.
*/
private RemoteIterator<S3ALocatedFileStatus> getListFilesAssumingDir(
Path path,
boolean recursive, Listing.FileStatusAcceptor acceptor,
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

add on a new line

boolean collectTombstones,
boolean forceNonAuthoritativeMS) throws IOException {

String key = maybeAddTrailingSlash(pathToKey(path));
String delimiter = recursive ? null : "/";
LOG.debug("Requesting all entries under {} with delimiter '{}'",
key, delimiter);
final RemoteIterator<S3AFileStatus> cachedFilesIterator;
final Set<Path> tombstones;
boolean allowAuthoritative = allowAuthoritative(path);
if (recursive) {
final PathMetadata pm = metadataStore.get(path, true);
// shouldn't need to check pm.isDeleted() because that will have
// been caught by getFileStatus above.
MetadataStoreListFilesIterator metadataStoreListFilesIterator =
new MetadataStoreListFilesIterator(metadataStore, pm,
allowAuthoritative);
tombstones = metadataStoreListFilesIterator.listTombstones();
// if all of the below is true
// - authoritative access is allowed for this metadatastore for this directory,
// - all the directory listings are authoritative on the client
// - the caller does not force non-authoritative access
// return the listing without any further s3 access
if (!forceNonAuthoritativeMS &&
allowAuthoritative &&
metadataStoreListFilesIterator.isRecursivelyAuthoritative()) {
S3AFileStatus[] statuses = S3Guard.iteratorToStatuses(
metadataStoreListFilesIterator, tombstones);
cachedFilesIterator = listing.createProvidedFileStatusIterator(
statuses, ACCEPT_ALL, acceptor);
return listing.createLocatedFileStatusIterator(cachedFilesIterator);
}
cachedFilesIterator = metadataStoreListFilesIterator;
} else {
DirListingMetadata meta =
S3Guard.listChildrenWithTtl(metadataStore, path, ttlTimeProvider,
allowAuthoritative);
if (meta != null) {
tombstones = meta.listTombstones();
} else {
tombstones = null;
}
cachedFilesIterator = listing.createProvidedFileStatusIterator(
S3Guard.dirMetaToStatuses(meta), ACCEPT_ALL, acceptor);
if (allowAuthoritative && meta != null && meta.isAuthoritative()) {
// metadata listing is authoritative, so return it directly
return listing.createLocatedFileStatusIterator(cachedFilesIterator);
}
}
return listing.createTombstoneReconcilingIterator(
listing.createLocatedFileStatusIterator(
listing.createFileStatusListingIterator(path,
createListObjectsRequest(key, delimiter),
ACCEPT_ALL,
acceptor,
cachedFilesIterator)),
collectTombstones ? tombstones : null);
}

/**
* Override superclass so as to add statistic collection.
* {@inheritDoc}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -69,7 +69,7 @@ public class ITestS3AFileOperationCost extends AbstractS3ATestBase {
@Parameterized.Parameters(name = "{0}")
public static Collection<Object[]> params() {
return Arrays.asList(new Object[][]{
{"raw", false},
// {"raw", false},
{"guarded", true}
});
}
Expand Down Expand Up @@ -168,6 +168,65 @@ public void testCostOfListLocatedStatusOnNonEmptyDir() throws Throwable {
}
}

@Test
public void testCostOfListFilesOnFile() throws Throwable {
describe("Performing listFiles() on a file");
Path file = path(getMethodName() + ".txt");
S3AFileSystem fs = getFileSystem();
touch(fs, file);
resetMetricDiffs();
fs.listFiles(file, true);
if (!fs.hasMetadataStore()) {
metadataRequests.assertDiffEquals(1);
} else {
if (fs.allowAuthoritative(file)) {
listRequests.assertDiffEquals(0);
} else {
listRequests.assertDiffEquals(1);
}
}
}

@Test
public void testCostOfListFilesOnEmptyDir() throws Throwable {
describe("Performing listFiles() on an empty dir");
Path dir = path(getMethodName());
S3AFileSystem fs = getFileSystem();
fs.mkdirs(dir);
resetMetricDiffs();
fs.listFiles(dir, true);
if (!fs.hasMetadataStore()) {
verifyOperationCount(2, 1);
} else {
if (fs.allowAuthoritative(dir)) {
verifyOperationCount(0, 0);
} else {
verifyOperationCount(0, 1);
}
}
}

@Test
public void testCostOfListFilesOnNonEmptyDir() throws Throwable {
describe("Performing listFiles() on a non empty dir");
Path dir = path(getMethodName());
S3AFileSystem fs = getFileSystem();
fs.mkdirs(dir);
Path file = new Path(dir, "file.txt");
touch(fs, file);
resetMetricDiffs();
fs.listFiles(dir, true);
if (!fs.hasMetadataStore()) {
verifyOperationCount(0, 1);
} else {
if (fs.allowAuthoritative(dir)) {
verifyOperationCount(0, 0);
} else {
verifyOperationCount(0, 1);
}
}
}

@Test
public void testCostOfGetFileStatusOnFile() throws Throwable {
describe("performing getFileStatus on a file");
Expand Down