Skip to content

Commit 0976f11

Browse files
authored
Merge pull request #3 from cwiklik/set-targetClusterName-in-spec
added support for setting targetClusterName in status
2 parents 85ab98b + fd0d436 commit 0976f11

File tree

10 files changed

+56
-10
lines changed

10 files changed

+56
-10
lines changed

cmd/kar-controllers/app/options/options.go

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -56,6 +56,7 @@ type ServerOption struct {
5656
QuotaRestURL string
5757
HealthProbeListenAddr string
5858
DispatchResourceReservationTimeout int64
59+
ExternalDispatch bool // if true, will use external plugin to dispatch workloads
5960
}
6061

6162
// NewServerOption creates a new CMServer with a default config.
@@ -83,6 +84,8 @@ func (s *ServerOption) AddFlags(fs *flag.FlagSet) {
8384
fs.IntVar(&s.SecurePort, "secure-port", 6443, "The port on which to serve secured, authenticated access for metrics.")
8485
fs.StringVar(&s.HealthProbeListenAddr, "healthProbeListenAddr", ":8081", "Listen address for health probes. Defaults to ':8081'")
8586
fs.Int64Var(&s.DispatchResourceReservationTimeout, "dispatchResourceReservationTimeout", s.DispatchResourceReservationTimeout, "Resource reservation timeout for pods to be created once AppWrapper is dispatched, in millisecond. Defaults to '300000', 5 minutes")
87+
fs.BoolVar(&s.ExternalDispatch,"externalDispatch", s.ExternalDispatch,"Use external workload dispatch plugin. Default is false.")
88+
8689
flag.Parse()
8790
klog.V(4).Infof("[AddFlags] Controller configuration: %#v", s)
8891
}
@@ -147,6 +150,12 @@ func (s *ServerOption) loadDefaultsFromEnvVars() {
147150
s.DispatchResourceReservationTimeout = to
148151
}
149152
}
153+
externalDispatch, envVarExists := os.LookupEnv("EXTERNAL_DISPATCH")
154+
s.ExternalDispatch = false
155+
if envVarExists && strings.EqualFold(externalDispatch, "true") {
156+
s.ExternalDispatch = true
157+
}
158+
150159
}
151160

152161
func (s *ServerOption) CheckOptionOrDie() {

deployment/mcad-controller/templates/configmap.yaml

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,7 @@ metadata:
77
data:
88
QUOTA_ENABLED: {{ .Values.configMap.quotaEnabled }}
99
DISPATCHER_MODE: {{ .Values.configMap.dispatcherMode }}
10+
EXTERNAL_DISPATCH: {{.Values.configMap.externalDispatch }}
1011
{{ if .Values.configMap.agentConfigs }}DISPATCHER_AGENT_CONFIGS: {{ .Values.configMap.agentConfigs }}{{ end }}
1112
PREEMPTION: {{ .Values.configMap.preemptionEnabled }}
1213
{{ if .Values.configMap.quotaRestUrl }}QUOTA_REST_URL: {{ .Values.configMap.quotaRestUrl }}{{ end }}

deployment/mcad-controller/values.yaml

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -47,6 +47,7 @@ configMap:
4747
quotaEnabled: '"false"'
4848
multiCluster: false
4949
dispatcherMode: '"false"'
50+
externalDispatch: '"false"'
5051
preemptionEnabled: '"false"'
5152
agentConfigs: ""
5253
quotaRestUrl: ""

deployment/mcad-operator/bundle/manifests/mcad-operator.clusterserviceversion.yaml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -3,7 +3,7 @@ kind: ClusterServiceVersion
33
metadata:
44
annotations:
55
alm-examples: >-
6-
[{"apiVersion":"mcad.ibm.com/v1beta1","kind":"MCADHelmConfig","metadata":{"name":"example-mcadhelmconfig"},"spec":{"configMap":{"agentConfigs":null,"dispatcherMode":"false","name":null},"deploymentName":"xqueuejob-controller","image":{"pullPolicy":"Always","repository":"darroyo/mcad-controller","tag":"v1.29.0"},"imagePullSecret":{"name":null,"password":"dummyvalue","registry":"registry.stage1.ng.bluemix.net","username":"iamapikey"},"loglevel":4,"namespace":"kube-system","nodeSelector":{"hostname":null},"replicaCount":1,"resources":{"limits":{"cpu":"2000m","memory":"2048Mi"},"requests":{"cpu":"2000m","memory":"2048Mi"}},"serviceAccount":"xqueuejob-controller","volumes":{"hostPath":null}}}]
6+
[{"apiVersion":"mcad.ibm.com/v1beta1","kind":"MCADHelmConfig","metadata":{"name":"example-mcadhelmconfig"},"spec":{"configMap":{"agentConfigs":null,"dispatcherMode":"false","externalDispatch":"false","name":null},"deploymentName":"xqueuejob-controller","image":{"pullPolicy":"Always","repository":"darroyo/mcad-controller","tag":"v1.29.0"},"imagePullSecret":{"name":null,"password":"dummyvalue","registry":"registry.stage1.ng.bluemix.net","username":"iamapikey"},"loglevel":4,"namespace":"kube-system","nodeSelector":{"hostname":null},"replicaCount":1,"resources":{"limits":{"cpu":"2000m","memory":"2048Mi"},"requests":{"cpu":"2000m","memory":"2048Mi"}},"serviceAccount":"xqueuejob-controller","volumes":{"hostPath":null}}}]
77
capabilities: Basic Install
88
description: A Kubernetes Native Holistic Lifecycle Resource Manager for Applications
99
name: mcad-operator.v0.1.9

deployment/mcad-operator/deploy/crds/mcad.ibm.com_v1beta1_mcadhelmconfig_cr.yaml

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,7 @@ spec:
88
configMap:
99
agentConfigs: null
1010
dispatcherMode: "false"
11+
externalDispatch: "false"
1112
name: null
1213
deploymentName: xqueuejob-controller
1314
image:

deployment/mcad-operator/deploy/olm-catalog/mcad-operator/manifests/mcad-operator.clusterserviceversion.yaml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -3,7 +3,7 @@ kind: ClusterServiceVersion
33
metadata:
44
annotations:
55
alm-examples: >-
6-
[{"apiVersion":"mcad.ibm.com/v1beta1","kind":"MCADHelmConfig","metadata":{"name":"example-mcadhelmconfig"},"spec":{"configMap":{"agentConfigs":null,"dispatcherMode":"false","name":null},"deploymentName":"xqueuejob-controller","image":{"pullPolicy":"Always","repository":"darroyo/mcad-controller","tag":"v1.29.0"},"imagePullSecret":{"name":null,"password":"dummyvalue","registry":"registry.stage1.ng.bluemix.net","username":"iamapikey"},"loglevel":4,"namespace":"kube-system","nodeSelector":{"hostname":null},"replicaCount":1,"resources":{"limits":{"cpu":"2000m","memory":"2048Mi"},"requests":{"cpu":"2000m","memory":"2048Mi"}},"serviceAccount":"xqueuejob-controller","volumes":{"hostPath":null}}}]
6+
[{"apiVersion":"mcad.ibm.com/v1beta1","kind":"MCADHelmConfig","metadata":{"name":"example-mcadhelmconfig"},"spec":{"configMap":{"agentConfigs":null,"dispatcherMode":"false","externalDispatch":"false","name":null},"deploymentName":"xqueuejob-controller","image":{"pullPolicy":"Always","repository":"darroyo/mcad-controller","tag":"v1.29.0"},"imagePullSecret":{"name":null,"password":"dummyvalue","registry":"registry.stage1.ng.bluemix.net","username":"iamapikey"},"loglevel":4,"namespace":"kube-system","nodeSelector":{"hostname":null},"replicaCount":1,"resources":{"limits":{"cpu":"2000m","memory":"2048Mi"},"requests":{"cpu":"2000m","memory":"2048Mi"}},"serviceAccount":"xqueuejob-controller","volumes":{"hostPath":null}}}]
77
capabilities: Basic Install
88
description: A Kubernetes Native Holistic Lifecycle Resource Manager for Applications
99
name: mcad-operator.v0.1.9

deployment/mcad-operator/helm-charts/mcad-controller/templates/configmap.yaml

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -6,5 +6,6 @@ metadata:
66
namespace: kube-system
77
data:
88
DISPATCHER_MODE: {{ .Values.configMap.dispatcherMode }}
9+
EXTERNAL_DISPATCH: {{.Values.configMap.externalDispatch }}
910
DISPATCHER_AGENT_CONFIGS: {{ .Values.configMap.agentConfigs }}
1011
#{{ end }}

deployment/mcad-operator/helm-charts/mcad-controller/values.yaml

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -32,6 +32,7 @@ configMap:
3232
name:
3333
multiCluster: false
3434
dispatcherMode: "false"
35+
externalDispatch: "false"
3536
agentConfigs:
3637

3738
volumes:

doc/deploy/deployment.md

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -150,6 +150,7 @@ The following table lists the configurable parameters of the helm chart and thei
150150
| ----------------------- | ------------------------------------ | ------------- | ------------------------------------------------ |
151151
| `configMap.agentConfigs` | *For Every Agent Cluster separated by commas(,):* Name of *agent* config file _:_ Set the dispatching mode for the _*Agent Cluster*_. Note:For the dispatching mode `uncordon`, indicating _MCAD_ controller is allowed to dispatched jobs to the _*Agent Cluster*_, is only supported. | <_No default for agent config file_>:`uncordon` | `agent101config:uncordon,agent110config:uncordon` |
152152
| `configMap.dispatcherMode` | Whether the _MCAD_ Controller should be launched in Dispatcher mode or not | `false` | `true` |
153+
| `configMap.externalDispatch` | Whether the _MCAD_ Controller should use external plugin to dispatch workloads or not | `false` | `true` |
153154
| `configMap.name` | Name of the Kubernetes *ConfigMap* resource to configure the _MCAD_ Controller | | `mcad-deployer` |
154155
| `deploymentName` | Name of _MCAD_ Controller Deployment Object | `mcad-controller` | `my-mcad-controller` |
155156
| `image.pullPolicy` | Policy that dictates when the specified image is pulled | `Always` | `Never` |

pkg/controller/queuejob/queuejob_controller_ex.go

Lines changed: 39 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -34,6 +34,7 @@ import (
3434
"fmt"
3535
"math"
3636
"math/rand"
37+
"path"
3738
"reflect"
3839
"runtime/debug"
3940
"sort"
@@ -1011,6 +1012,31 @@ func (qjm *XController) getAggregatedAvailableResourcesPriority(unallocatedClust
10111012
}
10121013

10131014
func (qjm *XController) chooseAgent(qj *arbv1.AppWrapper) string {
1015+
1016+
if qjm.serverOption.ExternalDispatch {
1017+
clusters := qj.Spec.SchedSpec.ClusterScheduling.Clusters
1018+
var agentId = ""
1019+
apath := path.Dir(qjm.agentList[0])
1020+
var agentIdList = make([]string, len(clusters))
1021+
clustersProvided := false // assume clusters not provided
1022+
for _, clusterRef := range clusters {
1023+
if clusterRef.Name != "" {
1024+
clustersProvided = true
1025+
agentIdList = append(agentIdList, apath+"/"+clusterRef.Name )
1026+
}
1027+
}
1028+
// target clusters no defined by the submitter of workload. Just pick a target
1029+
// from a known list of clusters provided in serverOption.AgentConfigs
1030+
if !clustersProvided {
1031+
agentId = qjm.agentList[rand.Int()%len(qjm.agentList)]
1032+
klog.V(1).Infof("ClusterId %s is chosen randomly from a list provided by mcad\n", agentId)
1033+
} else {
1034+
// choose target clusterId at random
1035+
agentId = agentIdList[rand.Int()%len(agentIdList)]
1036+
klog.V(1).Infof("ClusterId %s is chosen randomly from a list provided in Spec.SchedSpec.ClusterScheduling.Clusters: %s\n", agentId, agentIdList)
1037+
}
1038+
return agentId;
1039+
}
10141040

10151041
qjAggrResources := qjm.GetAggregatedResources(qj)
10161042
klog.V(2).Infof("[chooseAgent] Aggregated Resources of XQJ %s: %v\n", qj.Name, qjAggrResources)
@@ -1922,7 +1948,6 @@ func (cc *XController) manageQueueJob(qj *arbv1.AppWrapper, podPhaseChanges bool
19221948
defer func() {
19231949
klog.V(10).Infof("[worker-manageQJ] Ending %s manageQJ time=%s &qj=%p Version=%s Status=%+v", qj.Name, time.Now().Sub(startTime), qj, qj.ResourceVersion, qj.Status)
19241950
}()
1925-
19261951
if !cc.isDispatcher { // Agent Mode
19271952

19281953
if qj.DeletionTimestamp != nil {
@@ -2215,18 +2240,21 @@ func (cc *XController) manageQueueJob(qj *arbv1.AppWrapper, podPhaseChanges bool
22152240
current_time := time.Now()
22162241
klog.V(10).Infof("[worker-manageQJ] XQJ %s has Overhead Before Dispatching: %s", qj.Name, current_time.Sub(qj.CreationTimestamp.Time))
22172242
klog.V(10).Infof("[TTime] %s, %s: WorkerBeforeDispatch", qj.Name, time.Now().Sub(qj.CreationTimestamp.Time))
2218-
}
2219-
2243+
}
22202244
queuejobKey, _ := GetQueueJobKey(qj)
2221-
// agentId:=cc.dispatchMap[queuejobKey]
2222-
// if agentId!=nil {
22232245
if agentId, ok := cc.dispatchMap[queuejobKey]; ok {
22242246
klog.V(10).Infof("[Dispatcher Controller] Dispatched AppWrapper %s to Agent ID: %s.", qj.Name, agentId)
2225-
cc.agentMap[agentId].CreateJob(qj)
2247+
if cc.serverOption.ExternalDispatch {
2248+
values := strings.Split(agentId,"/")
2249+
klog.V(10).Infof("[Dispatcher Controller] Dispatching AppWrapper %s to Agent ID: %s Through External Dispatcher.", qj.Name, values[len(values)-1])
2250+
qj.Status.TargetClusterName = values[len(values)-1] //agentId
2251+
} else {
2252+
cc.agentMap[agentId].CreateJob(qj)
2253+
}
22262254
qj.Status.IsDispatched = true
22272255
} else {
22282256
klog.Errorf("[Dispatcher Controller] AppWrapper %s not found in dispatcher mapping.", qj.Name)
2229-
}
2257+
}
22302258
if klog.V(10).Enabled() {
22312259
current_time := time.Now()
22322260
klog.V(10).Infof("[Dispatcher Controller] XQJ %s has Overhead After Dispatching: %s", qj.Name, current_time.Sub(qj.CreationTimestamp.Time))
@@ -2277,7 +2305,10 @@ func (cc *XController) Cleanup(appwrapper *arbv1.AppWrapper) error {
22772305
if appwrapper.Status.IsDispatched {
22782306
queuejobKey, _ := GetQueueJobKey(appwrapper)
22792307
if obj, ok := cc.dispatchMap[queuejobKey]; ok {
2280-
cc.agentMap[obj].DeleteJob(appwrapper)
2308+
if !cc.serverOption.ExternalDispatch {
2309+
cc.agentMap[obj].DeleteJob(appwrapper)
2310+
}
2311+
delete(cc.dispatchMap,queuejobKey)
22812312
}
22822313
appwrapper.Status.IsDispatched = false
22832314
}

0 commit comments

Comments
 (0)