Skip to content

Commit 66bd168

Browse files
vivekratnavelhanishakoneru
authored andcommitted
HDDS-2107. Datanodes should retry forever to connect to SCM in an unsecure environment (#1424)
1 parent 4222b62 commit 66bd168

2 files changed

Lines changed: 12 additions & 5 deletions

File tree

hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/common/statemachine/SCMConnectionManager.java

Lines changed: 9 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -18,6 +18,8 @@
1818

1919
import org.apache.hadoop.conf.Configuration;
2020
import org.apache.hadoop.io.IOUtils;
21+
import org.apache.hadoop.io.retry.RetryPolicies;
22+
import org.apache.hadoop.io.retry.RetryPolicy;
2123
import org.apache.hadoop.ipc.ProtobufRpcEngine;
2224
import org.apache.hadoop.ipc.RPC;
2325
import org.apache.hadoop.metrics2.util.MBeans;
@@ -38,6 +40,7 @@
3840
import java.util.HashMap;
3941
import java.util.List;
4042
import java.util.Map;
43+
import java.util.concurrent.TimeUnit;
4144
import java.util.concurrent.locks.ReadWriteLock;
4245
import java.util.concurrent.locks.ReentrantReadWriteLock;
4346

@@ -139,10 +142,14 @@ public void addSCMServer(InetSocketAddress address) throws IOException {
139142
long version =
140143
RPC.getProtocolVersion(StorageContainerDatanodeProtocolPB.class);
141144

142-
StorageContainerDatanodeProtocolPB rpcProxy = RPC.getProxy(
145+
RetryPolicy retryPolicy =
146+
RetryPolicies.retryForeverWithFixedSleep(
147+
1000, TimeUnit.MILLISECONDS);
148+
StorageContainerDatanodeProtocolPB rpcProxy = RPC.getProtocolProxy(
143149
StorageContainerDatanodeProtocolPB.class, version,
144150
address, UserGroupInformation.getCurrentUser(), conf,
145-
NetUtils.getDefaultSocketFactory(conf), getRpcTimeout());
151+
NetUtils.getDefaultSocketFactory(conf), getRpcTimeout(),
152+
retryPolicy).getProxy();
146153

147154
StorageContainerDatanodeProtocolClientSideTranslatorPB rpcClient =
148155
new StorageContainerDatanodeProtocolClientSideTranslatorPB(rpcProxy);

hadoop-ozone/ozone-manager/src/test/java/org/apache/hadoop/ozone/om/ratis/TestOzoneManagerDoubleBufferWithOMResponse.java

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -119,7 +119,7 @@ public void stop() {
119119
* transactions or not.
120120
* @throws Exception
121121
*/
122-
@Test(timeout = 300_000)
122+
@Test(timeout = 500_000)
123123
public void testDoubleBuffer() throws Exception {
124124
// This test checks whether count in tables are correct or not.
125125
testDoubleBuffer(1, 10);
@@ -397,7 +397,7 @@ public void testDoubleBuffer(int iterations, int bucketCount)
397397
}
398398
return count == iterations;
399399

400-
}, 300, 40000);
400+
}, 300, 300000);
401401

402402

403403
GenericTestUtils.waitFor(() -> {
@@ -409,7 +409,7 @@ public void testDoubleBuffer(int iterations, int bucketCount)
409409
fail("testDoubleBuffer failed");
410410
}
411411
return count == bucketCount * iterations;
412-
}, 300, 40000);
412+
}, 300, 300000);
413413

414414
Assert.assertTrue(doubleBuffer.getFlushIterations() > 0);
415415
} finally {

0 commit comments

Comments
 (0)