Skip to content
Merged
Show file tree
Hide file tree
Changes from 4 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -55,6 +55,8 @@ public final class RouterMetrics {
private MutableGaugeInt numAppAttemptsFailedRetrieved;
@Metric("# of getClusterMetrics failed to be retrieved")
private MutableGaugeInt numGetClusterMetricsFailedRetrieved;
@Metric("# of getClusterNodes failed to be retrieved")
private MutableGaugeInt numGetClusterNodesFailedRetrieved;

// Aggregate metrics are shared, and don't have to be looked up per call
@Metric("Total number of successful Submitted apps and latency(ms)")
Expand All @@ -74,7 +76,9 @@ public final class RouterMetrics {
@Metric("Total number of successful Retrieved getClusterMetrics and "
+ "latency(ms)")
private MutableRate totalSucceededGetClusterMetricsRetrieved;

@Metric("Total number of successful Retrieved getClusterNodes and "
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think with the new checkstyle, you could make this one line.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

It should be a line indeed, I'll fix it.

+ "latency(ms)")
private MutableRate totalSucceededGetClusterNodesRetrieved;

/**
* Provide quantile counters for all latencies.
Expand All @@ -86,6 +90,7 @@ public final class RouterMetrics {
private MutableQuantiles getApplicationsReportLatency;
private MutableQuantiles getApplicationAttemptReportLatency;
private MutableQuantiles getClusterMetricsLatency;
private MutableQuantiles getClusterNodesLatency;

private static volatile RouterMetrics INSTANCE = null;
private static MetricsRegistry registry;
Expand All @@ -112,6 +117,10 @@ private RouterMetrics() {
getClusterMetricsLatency =
registry.newQuantiles("getClusterMetricsLatency",
"latency of get cluster metrics", "ops", "latency", 10);

getClusterNodesLatency =
registry.newQuantiles("getClusterNodesLatency",
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yetus seems not to be running the checkstyle.
Can you run it by hand and fix indentation?

CC @ayushtkn any idea on why the checkstyle is not being ran?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

no problem, I'll fix the indentation.

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Interesting, the Yetus report is not showing and is giving a +1.

"latency of get cluster nodes", "ops", "latency", 10);
}

public static RouterMetrics getMetrics() {
Expand Down Expand Up @@ -168,6 +177,11 @@ public long getNumSucceededGetClusterMetricsRetrieved(){
return totalSucceededGetClusterMetricsRetrieved.lastStat().numSamples();
}

@VisibleForTesting
public long getNumSucceededGetClusterNodesRetrieved(){
return totalSucceededGetClusterNodesRetrieved.lastStat().numSamples();
}

@VisibleForTesting
public double getLatencySucceededAppsCreated() {
return totalSucceededAppsCreated.lastStat().mean();
Expand Down Expand Up @@ -203,6 +217,11 @@ public double getLatencySucceededGetClusterMetricsRetrieved() {
return totalSucceededGetClusterMetricsRetrieved.lastStat().mean();
}

@VisibleForTesting
public double getLatencySucceededGetClusterNodesRetrieved() {
return totalSucceededGetClusterNodesRetrieved.lastStat().mean();
}

@VisibleForTesting
public int getAppsFailedCreated() {
return numAppsFailedCreated.value();
Expand Down Expand Up @@ -273,6 +292,11 @@ public void succeededGetClusterMetricsRetrieved(long duration) {
getClusterMetricsLatency.add(duration);
}

public void succeededGetClusterNodesRetrieved(long duration) {
totalSucceededGetClusterNodesRetrieved.add(duration);
getClusterNodesLatency.add(duration);
}

public void incrAppsFailedCreated() {
numAppsFailedCreated.incr();
}
Expand Down Expand Up @@ -301,4 +325,7 @@ public void incrGetClusterMetricsFailedRetrieved() {
numGetClusterMetricsFailedRetrieved.incr();
}

public void incrClusterNodesFailedRetrieved() {
numGetClusterNodesFailedRetrieved.incr();
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@

package org.apache.hadoop.yarn.server.router.clientrm;

import org.apache.hadoop.thirdparty.com.google.common.collect.Maps;
import org.apache.hadoop.thirdparty.com.google.common.util.concurrent.ThreadFactoryBuilder;
import java.io.IOException;
import java.lang.reflect.Method;
Expand Down Expand Up @@ -790,8 +791,32 @@ <R> Map<SubClusterId, R> invokeConcurrent(Collection<SubClusterId> clusterIds,

@Override
public GetClusterNodesResponse getClusterNodes(GetClusterNodesRequest request)
throws YarnException, IOException {
throw new NotImplementedException("Code is not implemented");
throws YarnException, IOException {
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Indentation

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Thanks, I will fix it

if (request == null) {
routerMetrics.incrClusterNodesFailedRetrieved();
RouterServerUtil.logAndThrowException(
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Single line.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

OK, I will fix it.

"Missing getClusterNodes request.", null);
}
long startTime = clock.getTime();
Map<SubClusterId, SubClusterInfo> subclusters =
federationFacade.getSubClusters(true);
Map<SubClusterId, GetClusterNodesResponse> clusterNodes = Maps.newHashMap();
for (Map.Entry<SubClusterId, SubClusterInfo> entry : subclusters.entrySet()) {
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

You don't use the values. We shouldn't use entrySet() but keys()

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

thank you, good advice, keysets should be used.

SubClusterId subClusterId = entry.getKey();
ApplicationClientProtocol client = null;
try {
client = getClientRMProxyForSubCluster(subClusterId);
GetClusterNodesResponse response = client.getClusterNodes(request);
clusterNodes.put(subClusterId, response);
} catch (Exception ex) {
routerMetrics.incrClusterNodesFailedRetrieved();
LOG.error("Unable to get cluster nodes due to exception.", ex);
}
}
long stopTime = clock.getTime();
routerMetrics.succeededGetClusterNodesRetrieved(stopTime - startTime);
// Merge the NodesResponse
return RouterYarnClientUtils.mergeClusterNodesResponse(clusterNodes.values());
}

@Override
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -17,17 +17,14 @@
*/
package org.apache.hadoop.yarn.server.router.clientrm;

import java.util.Collection;
import java.util.HashMap;
import java.util.Map;
import java.util.*;
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Avoid messing too much with the imports.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Thanks for the reminder, I'll fix it.


import org.apache.hadoop.yarn.api.protocolrecords.GetApplicationsResponse;
import org.apache.hadoop.yarn.api.protocolrecords.GetClusterMetricsResponse;
import org.apache.hadoop.yarn.api.records.ApplicationId;
import org.apache.hadoop.yarn.api.records.ApplicationReport;
import org.apache.hadoop.yarn.api.records.ApplicationResourceUsageReport;
import org.apache.hadoop.yarn.api.records.YarnClusterMetrics;
import org.apache.hadoop.yarn.api.protocolrecords.GetClusterNodesResponse;
import org.apache.hadoop.yarn.api.records.*;
import org.apache.hadoop.yarn.server.uam.UnmanagedApplicationManager;
import org.apache.hadoop.yarn.util.Records;
import org.apache.hadoop.yarn.util.resource.Resources;

/**
Expand Down Expand Up @@ -194,4 +191,24 @@ private static boolean mergeUamToReport(String appName,
return !(appName.startsWith(UnmanagedApplicationManager.APP_NAME) ||
appName.startsWith(PARTIAL_REPORT));
}

/**
* Merges a list of GetClusterNodesResponse.
*
* @param responses a list of GetClusterNodesResponse to merge
* @return the merged GetClusterNodesResponse
*/
public static GetClusterNodesResponse mergeClusterNodesResponse(
Collection<GetClusterNodesResponse> responses) {
GetClusterNodesResponse clusterNodesResponse = Records.newRecord(
GetClusterNodesResponse.class);
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Check the checkstyle report from Yetus.

Copy link
Contributor Author

@slfan1989 slfan1989 May 10, 2022

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Thanks, I will check yetus checkstyle report.

List<NodeReport> nodeReports = new ArrayList<>();
for (GetClusterNodesResponse response : responses) {
if (response != null && response.getNodeReports() != null) {
nodeReports.addAll(response.getNodeReports());
}
}
clusterNodesResponse.setNodeReports(nodeReports);
return clusterNodesResponse;
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -44,6 +44,8 @@
import org.apache.hadoop.yarn.api.protocolrecords.KillApplicationResponse;
import org.apache.hadoop.yarn.api.protocolrecords.SubmitApplicationRequest;
import org.apache.hadoop.yarn.api.protocolrecords.SubmitApplicationResponse;
import org.apache.hadoop.yarn.api.protocolrecords.GetClusterNodesResponse;
import org.apache.hadoop.yarn.api.protocolrecords.GetClusterNodesRequest;
import org.apache.hadoop.yarn.api.records.ApplicationAttemptId;
import org.apache.hadoop.yarn.api.records.ApplicationId;
import org.apache.hadoop.yarn.api.records.ApplicationSubmissionContext;
Expand Down Expand Up @@ -641,4 +643,18 @@ public void testGetApplicationsApplicationStateNotExists() throws Exception{
Assert.assertNotNull(responseGet);
Assert.assertTrue(responseGet.getApplicationList().isEmpty());
}

@Test
public void testGetClusterNodesRequest() throws Exception {
LOG.info("Test FederationClientInterceptor : Get Cluster Nodeds request");
// null request
LambdaTestUtils.intercept(YarnException.class,
"Missing getClusterNodes request.", () -> interceptor.getClusterNodes(null));

// normal request.
GetClusterNodesResponse response =
interceptor.getClusterNodes(GetClusterNodesRequest.newInstance());
Assert.assertEquals(subClusters.size(),
response.getNodeReports().size());
}
}