Skip to content

Commit 9b9375c

Browse files
committed
HADOOP-18425. ABFS rename resilience through etags
If "fs.azure.enable.rename.resilience" is true, then do a HEAD of the source file before the rename, which can then be used to recover from the failure, as the manifest committer does (HADOOP-18163). Change-Id: Ia417f1501f7274662eb9ff919c6378fb913b476b
1 parent 759ddeb commit 9b9375c

5 files changed

Lines changed: 40 additions & 7 deletions

File tree

hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/AbfsConfiguration.java

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -328,6 +328,10 @@ public class AbfsConfiguration{
328328
FS_AZURE_ENABLE_ABFS_LIST_ITERATOR, DefaultValue = DEFAULT_ENABLE_ABFS_LIST_ITERATOR)
329329
private boolean enableAbfsListIterator;
330330

331+
@BooleanConfigurationValidatorAnnotation(ConfigurationKey =
332+
FS_AZURE_ABFS_RENAME_RESILIENCE, DefaultValue = DEFAULT_ENABLE_ABFS_RENAME_RESILIENCE)
333+
private boolean renameResilience;
334+
331335
public AbfsConfiguration(final Configuration rawConfig, String accountName)
332336
throws IllegalAccessException, InvalidConfigurationValueException, IOException {
333337
this.rawConfig = ProviderUtils.excludeIncompatibleCredentialProviders(
@@ -1130,4 +1134,7 @@ public void setEnableAbfsListIterator(boolean enableAbfsListIterator) {
11301134
this.enableAbfsListIterator = enableAbfsListIterator;
11311135
}
11321136

1137+
public boolean getRenameResilience() {
1138+
return renameResilience;
1139+
}
11331140
}

hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/AzureBlobFileSystem.java

Lines changed: 26 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -46,6 +46,9 @@
4646
import javax.annotation.Nullable;
4747

4848
import org.apache.hadoop.classification.VisibleForTesting;
49+
import org.apache.hadoop.fs.EtagSource;
50+
import org.apache.hadoop.fs.azurebfs.services.AbfsHttpOperation;
51+
import org.apache.hadoop.fs.azurebfs.services.AbfsRestOperation;
4952
import org.apache.hadoop.util.Preconditions;
5053
import org.slf4j.Logger;
5154
import org.slf4j.LoggerFactory;
@@ -154,6 +157,11 @@ public class AzureBlobFileSystem extends FileSystem
154157
/** Rate limiting for operations which use it to throttle their IO. */
155158
private RateLimiting rateLimiting;
156159

160+
/**
161+
* Enable resilient rename.
162+
*/
163+
private boolean renameResilience;
164+
157165
@Override
158166
public void initialize(URI uri, Configuration configuration)
159167
throws IOException {
@@ -226,6 +234,8 @@ public void initialize(URI uri, Configuration configuration)
226234
}
227235

228236
rateLimiting = RateLimitingFactory.create(abfsConfiguration.getRateLimit());
237+
238+
renameResilience = abfsConfiguration.getRenameResilience();
229239
LOG.debug("Initializing AzureBlobFileSystem for {} complete", uri);
230240
}
231241

@@ -446,6 +456,8 @@ public boolean rename(final Path src, final Path dst) throws IOException {
446456
dstFileStatus = tryGetFileStatus(qualifiedDstPath, tracingContext);
447457
}
448458

459+
FileStatus sourceFileStatus = null;
460+
449461
try {
450462
String sourceFileName = src.getName();
451463
Path adjustedDst = dst;
@@ -459,10 +471,22 @@ public boolean rename(final Path src, final Path dst) throws IOException {
459471

460472
qualifiedDstPath = makeQualified(adjustedDst);
461473

462-
abfsStore.rename(qualifiedSrcPath, qualifiedDstPath, tracingContext, null);
474+
String etag = null;
475+
if (renameResilience) {
476+
sourceFileStatus = abfsStore.getFileStatus(qualifiedSrcPath, tracingContext);
477+
etag = ((EtagSource) sourceFileStatus).getEtag();
478+
}
479+
boolean recovered = abfsStore.rename(qualifiedSrcPath, qualifiedDstPath, tracingContext,
480+
etag);
481+
if (recovered) {
482+
LOG.info("Recovered from rename failure of {} to {}",
483+
qualifiedSrcPath, qualifiedDstPath);
484+
}
463485
return true;
464486
} catch (AzureBlobFileSystemException ex) {
465-
LOG.debug("Rename operation failed. ", ex);
487+
LOG.debug("Rename {} to {} failed. source {} dest {}",
488+
qualifiedSrcPath, qualifiedDstPath,
489+
sourceFileStatus, dstFileStatus, ex);
466490
checkException(
467491
src,
468492
ex,

hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/constants/ConfigurationKeys.java

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -233,6 +233,9 @@ public final class ConfigurationKeys {
233233
/** Key for rate limit capacity, as used by IO operations which try to throttle themselves. */
234234
public static final String FS_AZURE_ABFS_IO_RATE_LIMIT = "fs.azure.io.rate.limit";
235235

236+
/** Add extra resilience to rename failures, at the expense of performance. */
237+
public static final String FS_AZURE_ABFS_RENAME_RESILIENCE = "fs.azure.enable.rename.resilience";
238+
236239
public static String accountProperty(String property, String account) {
237240
return property + "." + account;
238241
}

hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/constants/FileSystemConfigurations.java

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -118,6 +118,7 @@ public final class FileSystemConfigurations {
118118

119119
public static final int STREAM_ID_LEN = 12;
120120
public static final boolean DEFAULT_ENABLE_ABFS_LIST_ITERATOR = true;
121+
public static final boolean DEFAULT_ENABLE_ABFS_RENAME_RESILIENCE = true;
121122

122123
/**
123124
* Limit of queued block upload operations before writes

hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/services/AbfsClient.java

Lines changed: 3 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -560,6 +560,7 @@ public AbfsClientRenameResult renamePath(
560560
if (!op.hasResult()) {
561561
throw e;
562562
}
563+
LOG.debug("Rename of {} to {} failed, attempting recovery", source, destination, e);
563564

564565
// ref: HADOOP-18242. Rename failure occurring due to a rare case of
565566
// tracking metadata being in incomplete state.
@@ -574,18 +575,15 @@ public AbfsClientRenameResult renamePath(
574575
// then we can retry the rename operation.
575576
AbfsRestOperation sourceStatusOp = getPathStatus(source, false,
576577
tracingContext);
577-
isMetadataIncompleteState = true;
578578
// Extract the sourceEtag, using the status Op, and set it
579579
// for future rename recovery.
580580
AbfsHttpOperation sourceStatusResult = sourceStatusOp.getResult();
581581
String sourceEtagAfterFailure = extractEtagHeader(sourceStatusResult);
582582
renamePath(source, destination, continuation, tracingContext,
583-
sourceEtagAfterFailure, isMetadataIncompleteState);
583+
sourceEtagAfterFailure, true);
584584
}
585585
// if we get out of the condition without a successful rename, then
586586
// it isn't metadata incomplete state issue.
587-
isMetadataIncompleteState = false;
588-
589587
boolean etagCheckSucceeded = renameIdempotencyCheckOp(
590588
source,
591589
sourceEtag, op, destination, tracingContext);
@@ -594,7 +592,7 @@ public AbfsClientRenameResult renamePath(
594592
// throw back the exception
595593
throw e;
596594
}
597-
return new AbfsClientRenameResult(op, true, isMetadataIncompleteState);
595+
return new AbfsClientRenameResult(op, true, false);
598596
}
599597
}
600598

0 commit comments

Comments
 (0)