Skip to content

Commit 1d70c8c

Browse files
swagleHanisha Koneru
authored andcommitted
HDDS-1464. Client should have different retry policies for different exceptions. (#785)
1 parent 69b903b commit 1d70c8c

2 files changed

Lines changed: 52 additions & 19 deletions

File tree

hadoop-ozone/client/src/main/java/org/apache/hadoop/ozone/client/OzoneClientUtils.java

Lines changed: 35 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -17,22 +17,29 @@
1717
*/
1818
package org.apache.hadoop.ozone.client;
1919

20+
import java.util.ArrayList;
21+
import java.util.HashMap;
22+
import java.util.List;
23+
import java.util.Map;
24+
import java.util.concurrent.TimeUnit;
25+
import java.util.concurrent.TimeoutException;
26+
2027
import org.apache.hadoop.hdds.client.OzoneQuota;
2128
import org.apache.hadoop.hdds.scm.client.HddsClientUtils;
2229
import org.apache.hadoop.hdds.scm.container.common.helpers.ContainerNotOpenException;
2330
import org.apache.hadoop.io.retry.RetryPolicies;
2431
import org.apache.hadoop.io.retry.RetryPolicy;
2532
import org.apache.hadoop.ozone.OzoneConsts;
26-
import org.apache.hadoop.ozone.client.rest.response.*;
33+
import org.apache.hadoop.ozone.client.rest.response.BucketInfo;
34+
import org.apache.hadoop.ozone.client.rest.response.KeyInfo;
35+
import org.apache.hadoop.ozone.client.rest.response.KeyInfoDetails;
36+
import org.apache.hadoop.ozone.client.rest.response.KeyLocation;
37+
import org.apache.hadoop.ozone.client.rest.response.VolumeInfo;
38+
import org.apache.hadoop.ozone.client.rest.response.VolumeOwner;
2739
import org.apache.ratis.protocol.AlreadyClosedException;
2840
import org.apache.ratis.protocol.GroupMismatchException;
2941
import org.apache.ratis.protocol.RaftRetryFailureException;
3042

31-
import java.util.ArrayList;
32-
import java.util.List;
33-
import java.util.concurrent.TimeUnit;
34-
import java.util.concurrent.TimeoutException;
35-
3643
/** A utility class for OzoneClient. */
3744
public final class OzoneClientUtils {
3845

@@ -129,14 +136,31 @@ public static KeyInfoDetails asKeyInfoDetails(OzoneKeyDetails key) {
129136

130137
public static RetryPolicy createRetryPolicy(int maxRetryCount,
131138
long retryInterval) {
132-
// just retry without sleep
133-
RetryPolicy retryPolicy = RetryPolicies
134-
.retryUpToMaximumCountWithFixedSleep(maxRetryCount, retryInterval,
135-
TimeUnit.MILLISECONDS);
136-
return retryPolicy;
139+
// retry with fixed sleep between retries
140+
return RetryPolicies.retryUpToMaximumCountWithFixedSleep(
141+
maxRetryCount, retryInterval, TimeUnit.MILLISECONDS);
137142
}
138143

139144
public static List<Class<? extends Exception>> getExceptionList() {
140145
return EXCEPTION_LIST;
141146
}
147+
148+
public static Map<Class<? extends Throwable>, RetryPolicy>
149+
getRetryPolicyByException(int maxRetryCount, long retryInterval) {
150+
Map<Class<? extends Throwable>, RetryPolicy> policyMap = new HashMap<>();
151+
for (Class<? extends Exception> ex : EXCEPTION_LIST) {
152+
if (ex == TimeoutException.class ||
153+
ex == RaftRetryFailureException.class) {
154+
// retry without sleep
155+
policyMap.put(ex, createRetryPolicy(maxRetryCount, 0));
156+
} else {
157+
// retry with fixed sleep between retries
158+
policyMap.put(ex, createRetryPolicy(maxRetryCount, retryInterval));
159+
}
160+
}
161+
// Default retry policy
162+
policyMap.put(Exception.class, createRetryPolicy(
163+
maxRetryCount, retryInterval));
164+
return policyMap;
165+
}
142166
}

hadoop-ozone/client/src/main/java/org/apache/hadoop/ozone/client/io/KeyOutputStream.java

Lines changed: 17 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -22,8 +22,7 @@
2222
import org.apache.hadoop.fs.FSExceptionMessages;
2323
import org.apache.hadoop.fs.FileEncryptionInfo;
2424
import org.apache.hadoop.hdds.protocol.DatanodeDetails;
25-
import org.apache.hadoop.hdds.protocol.datanode.proto.ContainerProtos
26-
.ChecksumType;
25+
import org.apache.hadoop.hdds.protocol.datanode.proto.ContainerProtos.ChecksumType;
2726
import org.apache.hadoop.hdds.scm.container.ContainerID;
2827
import org.apache.hadoop.hdds.scm.container.common.helpers.ContainerNotOpenException;
2928
import org.apache.hadoop.hdds.scm.storage.BufferPool;
@@ -52,7 +51,10 @@
5251
import java.util.List;
5352
import java.util.Collection;
5453
import java.util.ListIterator;
54+
import java.util.Map;
5555
import java.util.concurrent.TimeoutException;
56+
import java.util.function.Function;
57+
import java.util.stream.Collectors;
5658

5759
/**
5860
* Maintaining a list of BlockInputStream. Write based on offset.
@@ -95,7 +97,7 @@ enum StreamAction {
9597
private OmMultipartCommitUploadPartInfo commitUploadPartInfo;
9698
private FileEncryptionInfo feInfo;
9799
private ExcludeList excludeList;
98-
private final RetryPolicy retryPolicy;
100+
private final Map<Class<? extends Throwable>, RetryPolicy> retryPolicyMap;
99101
private int retryCount;
100102
private long offset;
101103
/**
@@ -121,7 +123,10 @@ public KeyOutputStream() {
121123
OzoneConfigKeys.OZONE_CLIENT_CHECKSUM_TYPE_DEFAULT);
122124
this.bytesPerChecksum = OzoneConfigKeys
123125
.OZONE_CLIENT_BYTES_PER_CHECKSUM_DEFAULT_BYTES; // Default is 1MB
124-
this.retryPolicy = RetryPolicies.TRY_ONCE_THEN_FAIL;
126+
this.retryPolicyMap = OzoneClientUtils.getExceptionList()
127+
.stream()
128+
.collect(Collectors.toMap(Function.identity(),
129+
e -> RetryPolicies.TRY_ONCE_THEN_FAIL));
125130
retryCount = 0;
126131
offset = 0;
127132
}
@@ -200,8 +205,8 @@ public KeyOutputStream(OpenKeySession handler,
200205
this.bufferPool =
201206
new BufferPool(chunkSize, (int)streamBufferMaxSize / chunkSize);
202207
this.excludeList = new ExcludeList();
203-
this.retryPolicy = OzoneClientUtils.createRetryPolicy(maxRetryCount,
204-
retryInterval);
208+
this.retryPolicyMap = OzoneClientUtils.getRetryPolicyByException(
209+
maxRetryCount, retryInterval);
205210
this.retryCount = 0;
206211
}
207212

@@ -502,10 +507,14 @@ private void markStreamClosed() {
502507
}
503508

504509
private void handleRetry(IOException exception, long len) throws IOException {
510+
RetryPolicy retryPolicy =
511+
retryPolicyMap.get(checkForException(exception).getClass());
512+
if (retryPolicy == null) {
513+
retryPolicy = retryPolicyMap.get(Exception.class);
514+
}
505515
RetryPolicy.RetryAction action;
506516
try {
507-
action = retryPolicy
508-
.shouldRetry(exception, retryCount, 0, true);
517+
action = retryPolicy.shouldRetry(exception, retryCount, 0, true);
509518
} catch (Exception e) {
510519
throw e instanceof IOException ? (IOException) e : new IOException(e);
511520
}

0 commit comments

Comments
 (0)