Skip to content

Commit 7457e09

Browse files
committed
Address review comments, add node transition resource accounting
1 parent 2ee6bf8 commit 7457e09

File tree

3 files changed

+89
-4
lines changed

3 files changed

+89
-4
lines changed

hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/rmnode/RMNode.java

Lines changed: 5 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -106,8 +106,11 @@ public interface RMNode {
106106
public Resource getTotalCapability();
107107

108108
/**
109-
* the total allocated resources to containers.
110-
* @return the total allocated resources.
109+
* The total allocated resources to containers.
110+
* This will include the sum of Guaranteed and Opportunistic
111+
* containers queued + running + paused on the node.
112+
* @return the total allocated resources, including all Guaranteed and
113+
* Opportunistic containers in queued, running and paused states.
111114
*/
112115
default Resource getAllocatedContainerResource() {
113116
return Resources.none();

hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/rmnode/RMNodeImpl.java

Lines changed: 11 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -959,13 +959,22 @@ public NodeState transition(RMNodeImpl rmNode, RMNodeEvent event) {
959959
ClusterMetrics.getMetrics().decrDecommisionedNMs();
960960
}
961961
containers = startEvent.getNMContainerStatuses();
962+
final Resource allocatedResource = Resource.newInstance(
963+
Resources.none());
962964
if (containers != null && !containers.isEmpty()) {
963965
for (NMContainerStatus container : containers) {
964-
if (container.getContainerState() == ContainerState.RUNNING) {
965-
rmNode.launchedContainers.add(container.getContainerId());
966+
if (container.getContainerState() == ContainerState.NEW ||
967+
container.getContainerState() == ContainerState.RUNNING) {
968+
Resources.addTo(allocatedResource,
969+
container.getAllocatedResource());
970+
if (container.getContainerState() == ContainerState.RUNNING) {
971+
rmNode.launchedContainers.add(container.getContainerId());
972+
}
966973
}
967974
}
968975
}
976+
977+
rmNode.allocatedContainerResource = allocatedResource;
969978
}
970979

971980
if (null != startEvent.getRunningApplications()) {

hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/TestRMNodeTransitions.java

Lines changed: 73 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -46,11 +46,14 @@
4646
import org.apache.hadoop.yarn.api.records.ExecutionType;
4747
import org.apache.hadoop.yarn.api.records.NodeId;
4848
import org.apache.hadoop.yarn.api.records.NodeState;
49+
import org.apache.hadoop.yarn.api.records.Priority;
4950
import org.apache.hadoop.yarn.api.records.Resource;
5051
import org.apache.hadoop.yarn.api.records.ResourceOption;
5152
import org.apache.hadoop.yarn.api.records.ResourceUtilization;
5253
import org.apache.hadoop.yarn.event.EventHandler;
5354
import org.apache.hadoop.yarn.event.InlineDispatcher;
55+
import org.apache.hadoop.yarn.nodelabels.CommonNodeLabelsManager;
56+
import org.apache.hadoop.yarn.server.api.protocolrecords.NMContainerStatus;
5457
import org.apache.hadoop.yarn.server.api.protocolrecords.NodeHeartbeatResponse;
5558
import org.apache.hadoop.yarn.server.api.records.NodeHealthStatus;
5659
import org.apache.hadoop.yarn.server.api.records.NodeStatus;
@@ -251,6 +254,14 @@ private static ContainerStatus getMockContainerStatus(
251254
return containerStatus;
252255
}
253256

257+
private static NMContainerStatus createNMContainerStatus(
258+
final ContainerId containerId, final ExecutionType executionType,
259+
final ContainerState containerState, final Resource capability) {
260+
return NMContainerStatus.newInstance(containerId, 0, containerState,
261+
capability, "", 0, Priority.newInstance(0), 0,
262+
CommonNodeLabelsManager.NO_LABEL, executionType, -1);
263+
}
264+
254265
@Test (timeout = 5000)
255266
public void testExpiredContainer() {
256267
NodeStatus mockNodeStatus = createMockNodeStatus();
@@ -375,6 +386,68 @@ public void testContainerUpdate() throws InterruptedException{
375386
.getContainerId());
376387
}
377388

389+
/**
390+
* Tests that allocated resources are counted correctly on new nodes
391+
* that are added to the cluster.
392+
*/
393+
@Test
394+
public void testAddWithAllocatedContainers() {
395+
NodeStatus mockNodeStatus = createMockNodeStatus();
396+
RMNodeImpl node = getNewNode();
397+
ApplicationId app0 = BuilderUtils.newApplicationId(0, 0);
398+
399+
// Independently computed expected allocated resource to verify against
400+
final Resource expectedResource = Resource.newInstance(Resources.none());
401+
402+
// Guaranteed containers
403+
final ContainerId newContainerId = BuilderUtils.newContainerId(
404+
BuilderUtils.newApplicationAttemptId(app0, 0), 0);
405+
final Resource newContainerCapability =
406+
Resource.newInstance(100, 1);
407+
Resources.addTo(expectedResource, newContainerCapability);
408+
final NMContainerStatus newContainerStatus = createNMContainerStatus(
409+
newContainerId, ExecutionType.GUARANTEED,
410+
ContainerState.NEW, newContainerCapability);
411+
412+
final ContainerId runningContainerId = BuilderUtils.newContainerId(
413+
BuilderUtils.newApplicationAttemptId(app0, 0), 1);
414+
final Resource runningContainerCapability =
415+
Resource.newInstance(200, 2);
416+
Resources.addTo(expectedResource, runningContainerCapability);
417+
final NMContainerStatus runningContainerStatus = createNMContainerStatus(
418+
runningContainerId, ExecutionType.GUARANTEED,
419+
ContainerState.RUNNING, runningContainerCapability);
420+
421+
// Opportunistic containers
422+
final ContainerId newOppContainerId = BuilderUtils.newContainerId(
423+
BuilderUtils.newApplicationAttemptId(app0, 0), 2);
424+
final Resource newOppContainerCapability =
425+
Resource.newInstance(300, 3);
426+
Resources.addTo(expectedResource, newOppContainerCapability);
427+
final NMContainerStatus newOppContainerStatus = createNMContainerStatus(
428+
newOppContainerId, ExecutionType.OPPORTUNISTIC,
429+
ContainerState.NEW, newOppContainerCapability);
430+
431+
final ContainerId runningOppContainerId = BuilderUtils.newContainerId(
432+
BuilderUtils.newApplicationAttemptId(app0, 0), 3);
433+
final Resource runningOppContainerCapability =
434+
Resource.newInstance(400, 4);
435+
Resources.addTo(expectedResource, runningOppContainerCapability);
436+
final NMContainerStatus runningOppContainerStatus = createNMContainerStatus(
437+
runningOppContainerId, ExecutionType.OPPORTUNISTIC,
438+
ContainerState.RUNNING, runningOppContainerCapability);
439+
440+
node.handle(new RMNodeStartedEvent(node.getNodeID(),
441+
Arrays.asList(newContainerStatus, runningContainerStatus,
442+
newOppContainerStatus, runningOppContainerStatus),
443+
null, mockNodeStatus));
444+
Assert.assertEquals(NodeState.RUNNING, node.getState());
445+
Assert.assertNotNull(nodesListManagerEvent);
446+
Assert.assertEquals(NodesListManagerEventType.NODE_USABLE,
447+
nodesListManagerEvent.getType());
448+
Assert.assertEquals(expectedResource, node.getAllocatedContainerResource());
449+
}
450+
378451
/**
379452
* Tests that allocated container resources are counted correctly in
380453
* {@link org.apache.hadoop.yarn.server.resourcemanager.rmnode.RMNode}

0 commit comments

Comments
 (0)