Skip to content
Merged
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,7 @@

import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.yarn.api.records.ApplicationSubmissionContext;
import org.apache.hadoop.yarn.api.protocolrecords.ReservationSubmissionRequest;
import org.apache.hadoop.yarn.conf.YarnConfiguration;
import org.apache.hadoop.yarn.exceptions.YarnException;
import org.apache.hadoop.yarn.server.federation.policies.exceptions.FederationPolicyException;
Expand Down Expand Up @@ -136,7 +137,7 @@ public SubClusterId getHomeSubcluster(

if (appSubmissionContext == null) {
throw new FederationPolicyException(
"The ApplicationSubmissionContext " + "cannot be null.");
"The ApplicationSubmissionContext cannot be null.");
}

String queue = appSubmissionContext.getQueue();
Expand All @@ -148,51 +149,7 @@ public SubClusterId getHomeSubcluster(
queue = YarnConfiguration.DEFAULT_QUEUE_NAME;
}

// the facade might cache this request, based on its parameterization
SubClusterPolicyConfiguration configuration = null;

try {
configuration = federationFacade.getPolicyConfiguration(queue);
} catch (YarnException e) {
String errMsg = "There is no policy configured for the queue: " + queue
+ ", falling back to defaults.";
LOG.warn(errMsg, e);
}

// If there is no policy configured for this queue, fallback to the baseline
// policy that is configured either in the store or via XML config (and
// cached)
if (configuration == null) {
LOG.warn("There is no policies configured for queue: " + queue + " we"
+ " fallback to default policy for: "
+ YarnConfiguration.DEFAULT_FEDERATION_POLICY_KEY);

queue = YarnConfiguration.DEFAULT_FEDERATION_POLICY_KEY;
try {
configuration = federationFacade.getPolicyConfiguration(queue);
} catch (YarnException e) {
String errMsg = "Cannot retrieve policy configured for the queue: "
+ queue + ", falling back to defaults.";
LOG.warn(errMsg, e);

}
}

// the fallback is not configure via store, but via XML, using
// previously loaded configuration.
if (configuration == null) {
configuration =
cachedConfs.get(YarnConfiguration.DEFAULT_FEDERATION_POLICY_KEY);
}

// if the configuration has changed since last loaded, reinit the policy
// based on current configuration
if (!cachedConfs.containsKey(queue)
|| !cachedConfs.get(queue).equals(configuration)) {
singlePolicyReinit(policyMap, cachedConfs, queue, configuration);
}

FederationRouterPolicy policy = policyMap.get(queue);
FederationRouterPolicy policy = getFederationRouterPolicy(cachedConfs, policyMap, queue);
if (policy == null) {
// this should never happen, as the to maps are updated together
throw new FederationPolicyException("No FederationRouterPolicy found "
Expand Down Expand Up @@ -262,4 +219,92 @@ public synchronized void reset() {

}

/**
* This method provides a wrapper of all policy functionalities for routing a
* reservation. Internally it manages configuration changes, and policy
* init/reinit.
*
* @param request the reservation to route.
*
* @return the id of the subcluster that will be the "home" for this
* reservation.
*
* @throws YarnException if there are issues initializing policies, or no
* valid sub-cluster id could be found for this reservation.
*/
public SubClusterId getReservationHomeSubCluster(
ReservationSubmissionRequest request) throws YarnException {

// the maps are concurrent, but we need to protect from reset()
// reinitialization mid-execution by creating a new reference local to this
// method.
Map<String, SubClusterPolicyConfiguration> cachedConfs = globalConfMap;
Map<String, FederationRouterPolicy> policyMap = globalPolicyMap;

if (request == null) {
throw new FederationPolicyException(
"The ReservationSubmissionRequest cannot be null.");
}

String queue = request.getQueue();
FederationRouterPolicy policy = getFederationRouterPolicy(cachedConfs, policyMap, queue);

if (policy == null) {
// this should never happen, as the to maps are updated together
throw new FederationPolicyException("No FederationRouterPolicy found "
+ "for queue: " + request.getQueue() + " (while routing "
+ "reservation: " + request.getReservationId() + ") "
+ "and no default specified.");
}

return policy.getReservationHomeSubcluster(request);
}

private FederationRouterPolicy getFederationRouterPolicy(
Map<String, SubClusterPolicyConfiguration> cachedConfiguration,
Map<String, FederationRouterPolicy> policyMap, String queue)
throws FederationPolicyInitializationException {

// the facade might cache this request, based on its parameterization
SubClusterPolicyConfiguration configuration = null;
String copyQueue = queue;

try {
configuration = federationFacade.getPolicyConfiguration(copyQueue);
} catch (YarnException e) {
LOG.warn("There is no policy configured for the queue: {}, " +
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

We can fit the string into a single line.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Thanks for your help reviewing the code, I will modify the code.

"falling back to defaults.", copyQueue, e);
}

// If there is no policy configured for this queue, fallback to the baseline
// policy that is configured either in the store or via XML config (and
// cached)
if (configuration == null) {
final String policyKey = YarnConfiguration.DEFAULT_FEDERATION_POLICY_KEY;
LOG.warn("There is no policies configured for queue: {} " +
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

We can rearrange so the string is a single line.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I will fix it.

"we fallback to default policy for: {}. ", copyQueue, policyKey);
copyQueue = YarnConfiguration.DEFAULT_FEDERATION_POLICY_KEY;
try {
configuration = federationFacade.getPolicyConfiguration(copyQueue);
} catch (YarnException e) {
LOG.warn("Cannot retrieve policy configured for the queue: {}, " +
"falling back to defaults.", copyQueue, e);
}
}

// the fallback is not configure via store, but via XML, using
// previously loaded configuration.
if (configuration == null) {
configuration = cachedConfiguration.get(YarnConfiguration.DEFAULT_FEDERATION_POLICY_KEY);
}

// if the configuration has changed since last loaded, reinit the policy
// based on current configuration
if (!cachedConfiguration.containsKey(copyQueue)
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Fix indentation

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I will fix it.

|| !cachedConfiguration.get(copyQueue).equals(configuration)) {
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I would extract to make it more readable.

singlePolicyReinit(policyMap, cachedConfiguration, copyQueue, configuration);
}

return policyMap.get(copyQueue);
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -18,15 +18,23 @@

package org.apache.hadoop.yarn.server.federation.policies.router;

import java.util.ArrayList;
import java.util.List;
import java.util.Map;

import org.apache.hadoop.yarn.api.protocolrecords.ReservationSubmissionRequest;
import org.apache.hadoop.yarn.api.records.ApplicationSubmissionContext;
import org.apache.hadoop.yarn.api.records.ReservationId;
import org.apache.hadoop.yarn.conf.YarnConfiguration;
import org.apache.hadoop.yarn.exceptions.YarnException;
import org.apache.hadoop.yarn.server.federation.policies.AbstractConfigurableFederationPolicy;
import org.apache.hadoop.yarn.server.federation.policies.FederationPolicyUtils;
import org.apache.hadoop.yarn.server.federation.policies.dao.WeightedPolicyInfo;
import org.apache.hadoop.yarn.server.federation.policies.exceptions.FederationPolicyException;
import org.apache.hadoop.yarn.server.federation.policies.exceptions.FederationPolicyInitializationException;
import org.apache.hadoop.yarn.server.federation.store.records.SubClusterId;
import org.apache.hadoop.yarn.server.federation.store.records.SubClusterIdInfo;
import org.apache.hadoop.yarn.server.federation.store.records.SubClusterInfo;

/**
* Base abstract class for {@link FederationRouterPolicy} implementations, that
Expand Down Expand Up @@ -63,4 +71,108 @@ public void validate(ApplicationSubmissionContext appSubmissionContext)
}
}

/**
* This method is implemented by the specific policy, and it is used to route
* both reservations, and applications among a given set of
* sub-clusters.
*
* @param queue the queue for this application/reservation
* @param preSelectSubClusters a pre-filter set of sub-clusters
* @return the chosen sub-cluster
*
* @throws YarnException if the policy fails to choose a sub-cluster
*/
protected abstract SubClusterId chooseSubCluster(String queue,
Map<SubClusterId, SubClusterInfo> preSelectSubClusters) throws YarnException;

/**
* Filter chosen SubCluster based on reservationId.
*
* @param reservationId the globally unique identifier for a reservation.
* @param activeSubClusters the map of ids to info for all active subclusters.
* @return the chosen sub-cluster
* @throws YarnException if the policy fails to choose a sub-cluster
*/
protected Map<SubClusterId, SubClusterInfo> prefilterSubClusters(
ReservationId reservationId, Map<SubClusterId, SubClusterInfo> activeSubClusters)
throws YarnException {

// if a reservation exists limit scope to the sub-cluster this
// reservation is mapped to
// TODO: Implemented in YARN-11236
return activeSubClusters;
}

/**
* Simply picks from alphabetically-sorted active subclusters based on the
* hash of quey name. Jobs of the same queue will all be routed to the same
* sub-cluster, as far as the number of active sub-cluster and their names
* remain the same.
*
* @param appContext the {@link ApplicationSubmissionContext} that
* has to be routed to an appropriate subCluster for execution.
*
* @param blackLists the list of subClusters as identified by
* {@link SubClusterId} to blackList from the selection of the home
* subCluster.
*
* @return a hash-based chosen {@link SubClusterId} that will be the "home"
* for this application.
*
* @throws YarnException if there are no active subclusters.
*/
@Override
public SubClusterId getHomeSubcluster(ApplicationSubmissionContext appContext,
List<SubClusterId> blackLists) throws YarnException {

// null checks and default-queue behavior
validate(appContext);

// apply filtering based on reservation location and active sub-clusters
Map<SubClusterId, SubClusterInfo> filteredSubClusters = prefilterSubClusters(
appContext.getReservationID(), getActiveSubclusters());

FederationPolicyUtils.validateSubClusterAvailability(
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Can we have validateSubClusterAvailability taking Collection so we don't need temp data structures?

new ArrayList<>(filteredSubClusters.keySet()), blackLists);

// remove black SubCluster
if (blackLists != null) {
blackLists.forEach(filteredSubClusters::remove);
}

// pick the chosen subCluster from the active ones
return chooseSubCluster(appContext.getQueue(), filteredSubClusters);
}

/**
* This method provides a wrapper of all policy functionalities for routing a
* reservation. Internally it manages configuration changes, and policy
* init/reinit.
*
* @param request the reservation to route.
*
* @return the id of the subcluster that will be the "home" for this
* reservation.
*
* @throws YarnException if there are issues initializing policies, or no
* valid sub-cluster id could be found for this reservation.
*/
@Override
public SubClusterId getReservationHomeSubcluster(ReservationSubmissionRequest request)
throws YarnException {
if (request == null) {
throw new FederationPolicyException("The ReservationSubmissionRequest cannot be null.");
}

if (request.getQueue() == null) {
request.setQueue(YarnConfiguration.DEFAULT_QUEUE_NAME);
}

// apply filtering based on reservation location and active sub-clusters
Map<SubClusterId, SubClusterInfo> filteredSubClusters = prefilterSubClusters(
request.getReservationId(), getActiveSubclusters());

// pick the chosen subCluster from the active ones
return chooseSubCluster(request.getQueue(), filteredSubClusters);
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@

import java.util.List;

import org.apache.hadoop.yarn.api.protocolrecords.ReservationSubmissionRequest;
import org.apache.hadoop.yarn.api.records.ApplicationSubmissionContext;
import org.apache.hadoop.yarn.exceptions.YarnException;
import org.apache.hadoop.yarn.server.federation.policies.ConfigurableFederationPolicy;
Expand Down Expand Up @@ -49,4 +50,16 @@ public interface FederationRouterPolicy extends ConfigurableFederationPolicy {
SubClusterId getHomeSubcluster(
ApplicationSubmissionContext appSubmissionContext,
List<SubClusterId> blackListSubClusters) throws YarnException;

/**
* Determines the sub-cluster where a ReservationSubmissionRequest should be
* sent to.
*
* @param request the original request
* @return a mapping of sub-clusters and the requests
*
* @throws YarnException if the policy fails to choose a sub-cluster
*/
SubClusterId getReservationHomeSubcluster(
ReservationSubmissionRequest request) throws YarnException;
}
Original file line number Diff line number Diff line change
Expand Up @@ -22,11 +22,9 @@
import java.util.List;
import java.util.Map;

import org.apache.hadoop.yarn.api.records.ApplicationSubmissionContext;
import org.apache.hadoop.yarn.exceptions.YarnException;
import org.apache.hadoop.yarn.server.federation.policies.FederationPolicyInitializationContext;
import org.apache.hadoop.yarn.server.federation.policies.FederationPolicyInitializationContextValidator;
import org.apache.hadoop.yarn.server.federation.policies.FederationPolicyUtils;
import org.apache.hadoop.yarn.server.federation.policies.exceptions.FederationPolicyInitializationException;
import org.apache.hadoop.yarn.server.federation.store.records.SubClusterId;
import org.apache.hadoop.yarn.server.federation.store.records.SubClusterInfo;
Expand All @@ -50,53 +48,12 @@ public void reinitialize(
setPolicyContext(federationPolicyContext);
}

/**
* Simply picks from alphabetically-sorted active subclusters based on the
* hash of quey name. Jobs of the same queue will all be routed to the same
* sub-cluster, as far as the number of active sub-cluster and their names
* remain the same.
*
* @param appSubmissionContext the {@link ApplicationSubmissionContext} that
* has to be routed to an appropriate subCluster for execution.
*
* @param blackListSubClusters the list of subClusters as identified by
* {@link SubClusterId} to blackList from the selection of the home
* subCluster.
*
* @return a hash-based chosen {@link SubClusterId} that will be the "home"
* for this application.
*
* @throws YarnException if there are no active subclusters.
*/
@Override
public SubClusterId getHomeSubcluster(
ApplicationSubmissionContext appSubmissionContext,
List<SubClusterId> blackListSubClusters) throws YarnException {

// throws if no active subclusters available
Map<SubClusterId, SubClusterInfo> activeSubclusters =
getActiveSubclusters();

FederationPolicyUtils.validateSubClusterAvailability(
new ArrayList<SubClusterId>(activeSubclusters.keySet()),
blackListSubClusters);

if (blackListSubClusters != null) {

// Remove from the active SubClusters from StateStore the blacklisted ones
for (SubClusterId scId : blackListSubClusters) {
activeSubclusters.remove(scId);
}
}

validate(appSubmissionContext);

int chosenPosition = Math.abs(
appSubmissionContext.getQueue().hashCode() % activeSubclusters.size());

List<SubClusterId> list = new ArrayList<>(activeSubclusters.keySet());
protected SubClusterId chooseSubCluster(String queue,
Map<SubClusterId, SubClusterInfo> preSelectSubClusters) throws YarnException {
int chosenPosition = Math.abs(queue.hashCode() % preSelectSubClusters.size());
List<SubClusterId> list = new ArrayList<>(preSelectSubClusters.keySet());
Collections.sort(list);
return list.get(chosenPosition);
}

}
Loading