Skip to content

Commit 027febe

Browse files
HDDS-9846. Datanode should not persist cluster ID to global version file until loading all volumes. (#5757)
1 parent fa9b909 commit 027febe

2 files changed

Lines changed: 33 additions & 4 deletions

File tree

  • hadoop-hdds

hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/common/states/endpoint/VersionEndpointTask.java

Lines changed: 5 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -76,10 +76,6 @@ public EndpointStateMachine.EndPointStates call() throws Exception {
7676
// If end point is passive, datanode does not need to check volumes.
7777
String scmId = response.getValue(OzoneConsts.SCM_ID);
7878
String clusterId = response.getValue(OzoneConsts.CLUSTER_ID);
79-
DatanodeLayoutStorage layoutStorage
80-
= new DatanodeLayoutStorage(configuration);
81-
layoutStorage.setClusterId(clusterId);
82-
layoutStorage.persistCurrentState();
8379

8480
Preconditions.checkNotNull(scmId,
8581
"Reply from SCM: scmId cannot be null");
@@ -92,6 +88,11 @@ public EndpointStateMachine.EndPointStates call() throws Exception {
9288
// Check HddsVolumes
9389
checkVolumeSet(ozoneContainer.getVolumeSet(), scmId, clusterId);
9490

91+
DatanodeLayoutStorage layoutStorage
92+
= new DatanodeLayoutStorage(configuration);
93+
layoutStorage.setClusterId(clusterId);
94+
layoutStorage.persistCurrentState();
95+
9596
// Start the container services after getting the version information
9697
ozoneContainer.start(clusterId);
9798
}

hadoop-hdds/server-scm/src/test/java/org/apache/hadoop/ozone/container/common/TestEndPoint.java

Lines changed: 28 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -25,6 +25,7 @@
2525
import java.util.Map;
2626
import java.util.UUID;
2727

28+
import org.apache.commons.io.FileUtils;
2829
import org.apache.hadoop.hdds.conf.OzoneConfiguration;
2930
import org.apache.hadoop.hdds.protocol.DatanodeDetails;
3031
import org.apache.hadoop.hdds.protocol.proto.HddsProtos;
@@ -79,6 +80,7 @@
7980
import org.junit.jupiter.api.io.TempDir;
8081
import org.mockito.Mockito;
8182

83+
import static org.junit.jupiter.api.Assertions.assertNotEquals;
8284
import static org.junit.jupiter.api.Assertions.assertEquals;
8385
import static org.mockito.Mockito.mock;
8486
import static org.mockito.Mockito.when;
@@ -285,6 +287,32 @@ public void testDnLayoutVersionFile() throws Exception {
285287
= new DatanodeLayoutStorage(ozoneConf,
286288
"na_expect_storage_initialized");
287289
assertEquals(scmServerImpl.getClusterId(), layout.getClusterID());
290+
291+
// Delete storage volume info
292+
File storageDir = ozoneContainer.getVolumeSet()
293+
.getVolumesList().get(0).getStorageDir();
294+
FileUtils.forceDelete(storageDir);
295+
296+
// Format volume VERSION file with
297+
// different clusterId than SCM clusterId.
298+
ozoneContainer.getVolumeSet().getVolumesList()
299+
.get(0).format("different_cluster_id");
300+
// Update layout clusterId and persist it.
301+
layout.setClusterId("different_cluster_id");
302+
layout.persistCurrentState();
303+
304+
// As the volume level clusterId didn't match with SCM clusterId
305+
// Even after the version call, the datanode layout file should
306+
// not update its clusterID field.
307+
rpcEndPoint.setState(EndpointStateMachine.EndPointStates.GETVERSION);
308+
versionTask.call();
309+
DatanodeLayoutStorage layout1
310+
= new DatanodeLayoutStorage(ozoneConf,
311+
"na_expect_storage_initialized");
312+
313+
assertEquals("different_cluster_id", layout1.getClusterID());
314+
assertNotEquals(scmServerImpl.getClusterId(), layout1.getClusterID());
315+
FileUtils.forceDelete(storageDir);
288316
}
289317
}
290318

0 commit comments

Comments
 (0)