Skip to content

Commit a432438

Browse files
HDDS-7210. Missing open containers show up as "Closing" on the container report. (#4207)
* HDDS-7210 Enable HealthState calculation when LifeCycleState is CLOSING or QUASI_CLOSED * HDDS-7210 Recalculate MISSING HealthState when LifeCycleState is CLOSING * HDDS-7210 Fix StyleCheck problems and add unit test * HDDS-7210 Execute the same code as for CLOSED container * HDDS-7210 Fix StyleCheck problems * HDDS-7210 Write safe setHealthStateForClosing function * HDDS-7210 Update Style * HDDS-7210 Update condition
1 parent a01676a commit a432438

2 files changed

Lines changed: 69 additions & 0 deletions

File tree

hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/container/replication/LegacyReplicationManager.java

Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -428,6 +428,7 @@ protected void processContainer(ContainerInfo container,
428428
* we have to resend close container command to the datanodes.
429429
*/
430430
if (state == LifeCycleState.CLOSING) {
431+
setHealthStateForClosing(replicas, container, report);
431432
for (ContainerReplica replica: replicas) {
432433
if (replica.getState() != State.UNHEALTHY) {
433434
sendCloseCommand(
@@ -1613,6 +1614,18 @@ private boolean isOpenContainerHealthy(
16131614
.allMatch(r -> compareState(state, r.getState()));
16141615
}
16151616

1617+
private void setHealthStateForClosing(Set<ContainerReplica> replicas,
1618+
ContainerInfo container,
1619+
ReplicationManagerReport report) {
1620+
if (replicas.size() == 0) {
1621+
report.incrementAndSample(HealthState.MISSING, container.containerID());
1622+
report.incrementAndSample(HealthState.UNDER_REPLICATED,
1623+
container.containerID());
1624+
report.incrementAndSample(HealthState.MIS_REPLICATED,
1625+
container.containerID());
1626+
}
1627+
}
1628+
16161629
public boolean isContainerReplicatingOrDeleting(ContainerID containerID) {
16171630
return inflightReplication.containsKey(containerID) ||
16181631
inflightDeletion.containsKey(containerID);

hadoop-hdds/server-scm/src/test/java/org/apache/hadoop/hdds/scm/container/replication/TestLegacyReplicationManager.java

Lines changed: 56 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -400,6 +400,62 @@ public void testClosingContainer() throws IOException, TimeoutException {
400400
Assertions.assertEquals(1, report.getStat(LifeCycleState.CLOSING));
401401
}
402402

403+
/**
404+
* Create closing container with 1 replica.
405+
* Expectation: Missing containers 0.
406+
* Remove the only replica.
407+
* Expectation: Missing containers 1.
408+
*/
409+
@Test
410+
public void testClosingMissingContainer()
411+
throws IOException, TimeoutException {
412+
final ContainerInfo container = getContainer(LifeCycleState.CLOSING);
413+
final ContainerID id = container.containerID();
414+
415+
containerStateManager.addContainer(container.getProtobuf());
416+
417+
// One replica in OPEN state
418+
final Set<ContainerReplica> replicas = getReplicas(id, State.OPEN,
419+
randomDatanodeDetails());
420+
421+
for (ContainerReplica replica : replicas) {
422+
containerStateManager.updateContainerReplica(id, replica);
423+
}
424+
425+
final int currentCloseCommandCount = datanodeCommandHandler
426+
.getInvocationCount(SCMCommandProto.Type.closeContainerCommand);
427+
428+
replicationManager.processAll();
429+
eventQueue.processAll(1000);
430+
Assertions.assertEquals(currentCloseCommandCount + 1,
431+
datanodeCommandHandler.getInvocationCount(
432+
SCMCommandProto.Type.closeContainerCommand));
433+
434+
ReplicationManagerReport report = replicationManager.getContainerReport();
435+
Assertions.assertEquals(1, report.getStat(LifeCycleState.CLOSING));
436+
Assertions.assertEquals(0, report.getStat(
437+
ReplicationManagerReport.HealthState.MISSING));
438+
439+
for (ContainerReplica replica : replicas) {
440+
containerStateManager.removeContainerReplica(id, replica);
441+
}
442+
443+
replicationManager.processAll();
444+
eventQueue.processAll(1000);
445+
Assertions.assertEquals(currentCloseCommandCount + 1,
446+
datanodeCommandHandler.getInvocationCount(
447+
SCMCommandProto.Type.closeContainerCommand));
448+
449+
report = replicationManager.getContainerReport();
450+
Assertions.assertEquals(1, report.getStat(LifeCycleState.CLOSING));
451+
Assertions.assertEquals(1, report.getStat(
452+
ReplicationManagerReport.HealthState.MISSING));
453+
Assertions.assertEquals(1, report.getStat(
454+
ReplicationManagerReport.HealthState.UNDER_REPLICATED));
455+
Assertions.assertEquals(1, report.getStat(
456+
ReplicationManagerReport.HealthState.MIS_REPLICATED));
457+
}
458+
403459
@Test
404460
public void testReplicateCommandTimeout()
405461
throws IOException, TimeoutException {

0 commit comments

Comments
 (0)