Skip to content

Commit 7558b54

Browse files
committed
YARN-11200 Backport numa to branch-2.10
YARN-11200. Fix Test error YARN-11200. Fix build error fix package info checkstyle error fix package info checkstyle error fix package info checkstyle error add package info adding extra space Triiger jenkins Import correctly in package-info for numa
1 parent 5d96646 commit 7558b54

12 files changed

Lines changed: 1365 additions & 2 deletions

File tree

hadoop-yarn-project/hadoop-yarn/hadoop-yarn-api/src/main/java/org/apache/hadoop/yarn/conf/YarnConfiguration.java

Lines changed: 27 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3524,6 +3524,22 @@ public static boolean areNodeLabelsEnabled(
35243524
public static final String DEFAULT_YARN_WORKFLOW_ID_TAG_PREFIX =
35253525
"workflowid:";
35263526

3527+
/**
3528+
* Settings for NUMA awareness.
3529+
*/
3530+
public static final String NM_NUMA_AWARENESS_ENABLED = NM_PREFIX
3531+
+ "numa-awareness.enabled";
3532+
public static final boolean DEFAULT_NM_NUMA_AWARENESS_ENABLED = false;
3533+
public static final String NM_NUMA_AWARENESS_READ_TOPOLOGY = NM_PREFIX
3534+
+ "numa-awareness.read-topology";
3535+
public static final boolean DEFAULT_NM_NUMA_AWARENESS_READ_TOPOLOGY = false;
3536+
public static final String NM_NUMA_AWARENESS_NODE_IDS = NM_PREFIX
3537+
+ "numa-awareness.node-ids";
3538+
public static final String NM_NUMA_AWARENESS_NUMACTL_CMD = NM_PREFIX
3539+
+ "numa-awareness.numactl.cmd";
3540+
public static final String DEFAULT_NM_NUMA_AWARENESS_NUMACTL_CMD =
3541+
"/usr/bin/numactl";
3542+
35273543
public YarnConfiguration() {
35283544
super();
35293545
}
@@ -3535,6 +3551,17 @@ public YarnConfiguration(Configuration conf) {
35353551
}
35363552
}
35373553

3554+
/**
3555+
* Returns whether the NUMA awareness is enabled.
3556+
*
3557+
* @param conf the configuration
3558+
* @return whether the NUMA awareness is enabled.
3559+
*/
3560+
public static boolean numaAwarenessEnabled(Configuration conf) {
3561+
return conf.getBoolean(NM_NUMA_AWARENESS_ENABLED,
3562+
DEFAULT_NM_NUMA_AWARENESS_ENABLED);
3563+
}
3564+
35383565
@Private
35393566
public static List<String> getServiceAddressConfKeys(Configuration conf) {
35403567
return useHttps(conf) ? RM_SERVICES_ADDRESS_CONF_KEYS_HTTPS

hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/main/resources/yarn-default.xml

Lines changed: 52 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3680,4 +3680,56 @@
36803680
<name>yarn.client.load.resource-types.from-server</name>
36813681
<value>false</value>
36823682
</property>
3683+
3684+
<property>
3685+
<description>
3686+
Whether to enable the NUMA awareness for containers in Node Manager.
3687+
</description>
3688+
<name>yarn.nodemanager.numa-awareness.enabled</name>
3689+
<value>false</value>
3690+
</property>
3691+
3692+
<property>
3693+
<description>
3694+
Whether to read the NUMA topology from the system or from the
3695+
configurations. If the value is true then NM reads the NUMA topology from
3696+
system using the command 'numactl --hardware'. If the value is false then NM
3697+
reads the topology from the configurations
3698+
'yarn.nodemanager.numa-awareness.node-ids'(for node id's),
3699+
'yarn.nodemanager.numa-awareness.&lt;NODE_ID&gt;.memory'(for each node memory),
3700+
'yarn.nodemanager.numa-awareness.&lt;NODE_ID&gt;.cpus'(for each node cpus).
3701+
</description>
3702+
<name>yarn.nodemanager.numa-awareness.read-topology</name>
3703+
<value>false</value>
3704+
</property>
3705+
3706+
<property>
3707+
<description>
3708+
NUMA node id's in the form of comma separated list. Memory and No of CPUs
3709+
will be read using the properties
3710+
'yarn.nodemanager.numa-awareness.&lt;NODE_ID&gt;.memory' and
3711+
'yarn.nodemanager.numa-awareness.&lt;NODE_ID&gt;.cpus' for each id specified
3712+
in this value. This property value will be read only when
3713+
'yarn.nodemanager.numa-awareness.read-topology=false'.
3714+
3715+
For example, if yarn.nodemanager.numa-awareness.node-ids=0,1
3716+
then need to specify memory and cpus for node id's '0' and '1' like below,
3717+
yarn.nodemanager.numa-awareness.0.memory=73717
3718+
yarn.nodemanager.numa-awareness.0.cpus=4
3719+
yarn.nodemanager.numa-awareness.1.memory=73727
3720+
yarn.nodemanager.numa-awareness.1.cpus=4
3721+
</description>
3722+
<name>yarn.nodemanager.numa-awareness.node-ids</name>
3723+
<value></value>
3724+
</property>
3725+
3726+
<property>
3727+
<description>
3728+
The numactl command path which controls NUMA policy for processes or
3729+
shared memory.
3730+
</description>
3731+
<name>yarn.nodemanager.numa-awareness.numactl.cmd</name>
3732+
<value>/usr/bin/numactl</value>
3733+
</property>
3734+
36833735
</configuration>

hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/LinuxContainerExecutor.java

Lines changed: 15 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -111,6 +111,7 @@ public class LinuxContainerExecutor extends ContainerExecutor {
111111
private boolean containerLimitUsers;
112112
private ResourceHandler resourceHandlerChain;
113113
private LinuxContainerRuntime linuxContainerRuntime;
114+
private Context nmContext;
114115

115116
/**
116117
* The container exit code.
@@ -262,6 +263,13 @@ protected String getContainerExecutorExecutablePath(Configuration conf) {
262263
defaultPath);
263264
}
264265

266+
private void addNumaArgsToCommand(List<String> prefixCommands,
267+
List<String> numaArgs) {
268+
if (numaArgs != null) {
269+
prefixCommands.addAll(numaArgs);
270+
}
271+
}
272+
265273
/**
266274
* Add a niceness level to the process that will be executed. Adds
267275
* {@code -n <nice>} to the given command. The niceness level will be
@@ -282,7 +290,8 @@ protected PrivilegedOperationExecutor getPrivilegedOperationExecutor() {
282290
}
283291

284292
@Override
285-
public void init(Context nmContext) throws IOException {
293+
public void init(Context context) throws IOException {
294+
this.nmContext = context;
286295
Configuration conf = super.getConf();
287296

288297
// Send command to executor which will just start up,
@@ -475,6 +484,7 @@ public int launchContainer(ContainerStartContext ctx)
475484
container.getResource());
476485
String resourcesOptions = resourcesHandler.getResourcesOption(containerId);
477486
String tcCommandFile = null;
487+
List<String> numaArgs = null;
478488

479489
try {
480490
if (resourceHandlerChain != null) {
@@ -496,6 +506,9 @@ public int launchContainer(ContainerStartContext ctx)
496506
case TC_MODIFY_STATE:
497507
tcCommandFile = op.getArguments().get(0);
498508
break;
509+
case ADD_NUMA_PARAMS:
510+
numaArgs = op.getArguments();
511+
break;
499512
default:
500513
LOG.warn("PrivilegedOperation type unsupported in launch: "
501514
+ op.getOperationType());
@@ -529,6 +542,7 @@ public int launchContainer(ContainerStartContext ctx)
529542
.Builder(container);
530543

531544
addSchedPriorityCommand(prefixCommands);
545+
addNumaArgsToCommand(prefixCommands, numaArgs);
532546
if (prefixCommands.size() > 0) {
533547
builder.setExecutionAttribute(CONTAINER_LAUNCH_PREFIX_COMMANDS,
534548
prefixCommands);

hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/linux/privileged/PrivilegedOperation.java

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -52,7 +52,8 @@ public enum OperationType {
5252
ADD_PID_TO_CGROUP(""), //no CLI switch supported yet.
5353
RUN_DOCKER_CMD("--run-docker"),
5454
GPU("--module-gpu"),
55-
LIST_AS_USER(""); //no CLI switch supported yet.
55+
LIST_AS_USER(""), //no CLI switch supported yet.
56+
ADD_NUMA_PARAMS(""); // no CLI switch supported yet.
5657

5758
private final String option;
5859

hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/linux/resources/ResourceHandlerModule.java

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -27,6 +27,7 @@
2727
import org.apache.hadoop.yarn.conf.YarnConfiguration;
2828
import org.apache.hadoop.yarn.server.nodemanager.Context;
2929
import org.apache.hadoop.yarn.server.nodemanager.containermanager.linux.privileged.PrivilegedOperationExecutor;
30+
import org.apache.hadoop.yarn.server.nodemanager.containermanager.linux.resources.numa.NumaResourceHandlerImpl;
3031
import org.apache.hadoop.yarn.server.nodemanager.containermanager.resourceplugin.ResourcePlugin;
3132
import org.apache.hadoop.yarn.server.nodemanager.containermanager.resourceplugin.ResourcePluginManager;
3233
import org.apache.hadoop.yarn.server.nodemanager.util.CgroupsLCEResourcesHandler;
@@ -208,6 +209,14 @@ private static void addHandlerIfNotNull(List<ResourceHandler> handlerList,
208209
}
209210
}
210211

212+
private static ResourceHandler getNumaResourceHandler(Configuration conf,
213+
Context nmContext) {
214+
if (YarnConfiguration.numaAwarenessEnabled(conf)) {
215+
return new NumaResourceHandlerImpl(conf, nmContext);
216+
}
217+
return null;
218+
}
219+
211220
private static void initializeConfiguredResourceHandlerChain(
212221
Configuration conf, Context nmContext)
213222
throws ResourceHandlerException {
@@ -218,6 +227,7 @@ private static void initializeConfiguredResourceHandlerChain(
218227
addHandlerIfNotNull(handlerList, getMemoryResourceHandler(conf));
219228
addHandlerIfNotNull(handlerList, getCGroupsCpuResourceHandler(conf));
220229
addHandlersFromConfiguredResourcePlugins(handlerList, conf, nmContext);
230+
addHandlerIfNotNull(handlerList, getNumaResourceHandler(conf, nmContext));
221231
resourceHandlerChain = new ResourceHandlerChain(handlerList);
222232
}
223233

0 commit comments

Comments
 (0)