Skip to content

Commit 6103dc2

Browse files
committed
Implement gang scheduling and add volcano provider example
Signed-off-by: JesseStutler <[email protected]>
1 parent 4cd9e42 commit 6103dc2

File tree

24 files changed

+917
-55
lines changed

24 files changed

+917
-55
lines changed

README.md

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -22,6 +22,7 @@ Read the [documentation](https://lws.sigs.k8s.io/docs/) or watch the LWS-related
2222
- **Group of Pods as a unit:** Supports a tightly managed group of pods that represent a “super pod”
2323
- **Unique pod identity:** Each pod in the group has a unique index from 0 to n-1.
2424
- **Parallel creation:** Pods in the group will have the same lifecycle and be created in parallel.
25+
- **Gang Scheduling:** The group of pods can be scheduled in an all-or-nothing manner.
2526
- **Dual-template, one for leader and one for the workers:** A replica is a group of a single leader and a set of workers, and allow to specify a template for the workers and optionally use a second one for the leader pod.
2627
- **Multiple groups with identical specifications:** Supports creating multiple “replicas” of the above mentioned group. Each group is a single unit for rolling update, scaling, and maps to a single exclusive topology for placement.
2728
- **A scale subresource:** A scale endpoint is exposed for HPA to dynamically scale the number replicas (aka number of groups)

api/config/v1alpha1/configuration_types.go

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -55,6 +55,15 @@ type ControllerManager struct {
5555
// Health contains the controller health configuration
5656
// +optional
5757
Health ControllerHealth `json:"health,omitempty"`
58+
59+
// EnableGangScheduling controls whether to enable gang-scheduling.
60+
// +optional
61+
EnableGangScheduling bool `json:"enableGangScheduling,omitempty"`
62+
63+
// SchedulerProvider is the name of the scheduler that provides gang-scheduling capabilities.
64+
// This is required if EnableGangScheduling is true.
65+
// +optional
66+
SchedulerProvider string `json:"schedulerProvider,omitempty"`
5867
}
5968

6069
// ControllerWebhook defines the webhook server for the controller.

charts/lws/README.md

Lines changed: 23 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -65,24 +65,26 @@ for more information on installing LWS with metrics using our Helm chart.
6565

6666
The following table lists the configurable parameters of the LWS chart and their default values.
6767

68-
| Parameter | Description | Default |
69-
|---------------------------------------------|------------------------------------------------|--------------------------------------|
70-
| `nameOverride` | nameOverride | `` |
71-
| `fullnameOverride` | fullnameOverride | `` |
72-
| `enablePrometheus` | enable Prometheus | `false` |
73-
| `enableCertManager` | enable CertManager | `false` |
74-
| `imagePullSecrets` | Image pull secrets | `[]` |
75-
| `image.manager.repository` | Repository for manager image | `us-central1-docker.pkg.dev/k8s-staging-images/lws` |
76-
| `image.manager.tag` | Tag for manager image | `main` |
77-
| `image.manager.pullPolicy` | Pull policy for manager image | `IfNotPresent` |
78-
| `podAnnotations` | Annotations for pods | `{}` |
79-
| `podSecurityContext.runAsNonRoot` | Run pod as non-root user | `true` |
80-
| `securityContext.allowPrivilegeEscalation` | Allow privilege escalation in security context | `false` |
81-
| `securityContext.capabilities.drop` | Drop all capabilities in security context | `["ALL"]` |
82-
| `service.type` | Type of lws controller service | `ClusterIP` |
83-
| `service.port` | Lws controller service port | `9443` |
84-
| `resources.requests.cpu` | CPU request for resources | `1` |
85-
| `resources.requests.memory` | Memory request for resources | `1Gi` |
86-
| `nodeSelector` | Node selector | `{}` |
87-
| `tolerations` | Tolerations | `{}` |
88-
| `affinity` | Affinity | `{}` |
68+
| Parameter | Description | Default |
69+
|--------------------------------------------|------------------------------------------------|---------------------------------------------------|
70+
| `nameOverride` | nameOverride | `` |
71+
| `fullnameOverride` | fullnameOverride | `` |
72+
| `enablePrometheus` | enable Prometheus | `false` |
73+
| `enableCertManager` | enable CertManager | `false` |
74+
| `imagePullSecrets` | Image pull secrets | `[]` |
75+
| `image.manager.repository` | Repository for manager image | `us-central1-docker.pkg.dev/k8s-staging-images/lws` |
76+
| `image.manager.tag` | Tag for manager image | `main` |
77+
| `image.manager.pullPolicy` | Pull policy for manager image | `IfNotPresent` |
78+
| `podAnnotations` | Annotations for pods | `{}` |
79+
| `podSecurityContext.runAsNonRoot` | Run pod as non-root user | `true` |
80+
| `securityContext.allowPrivilegeEscalation` | Allow privilege escalation in security context | `false` |
81+
| `securityContext.capabilities.drop` | Drop all capabilities in security context | `["ALL"]` |
82+
| `service.type` | Type of lws controller service | `ClusterIP` |
83+
| `service.port` | Lws controller service port | `9443` |
84+
| `resources.requests.cpu` | CPU request for resources | `1` |
85+
| `resources.requests.memory` | Memory request for resources | `1Gi` |
86+
| `nodeSelector` | Node selector | `{}` |
87+
| `tolerations` | Tolerations | `{}` |
88+
| `affinity` | Affinity | `{}` |
89+
| `gangScheduling.enable` | enable gang scheduling capabilities | `false` |
90+
| `gangScheduling.schedulerProvider` | Scheduler provider that provides gang scheduling capabilities | `` |

charts/lws/templates/manager/deployment.yaml

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -37,6 +37,10 @@ spec:
3737
- args:
3838
- --config=/controller_manager_config.yaml
3939
- --zap-log-level=2
40+
{{- if .Values.gangScheduling.enable }}
41+
- --enable-gang-scheduling={{ .Values.gangScheduling.enable }}
42+
- --scheduler-provider={{ .Values.gangScheduling.schedulerProvider }}
43+
{{- end }}
4044
command:
4145
- /manager
4246
name: manager

charts/lws/templates/rbac/clusterrole.yaml

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -128,6 +128,17 @@ rules:
128128
- get
129129
- patch
130130
- update
131+
{{- if and .Values.gangScheduling.enable (eq .Values.gangScheduling.schedulerProvider "volcano") }}
132+
- apiGroups:
133+
- scheduling.volcano.sh
134+
resources:
135+
- podgroups
136+
verbs:
137+
- create
138+
- get
139+
- list
140+
- watch
141+
{{- end }}
131142
---
132143
apiVersion: rbac.authorization.k8s.io/v1
133144
kind: ClusterRole

charts/lws/values.yaml

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -48,3 +48,6 @@ resources:
4848
nodeSelector: {}
4949
tolerations: []
5050
affinity: {}
51+
gangScheduling:
52+
enable: false
53+
schedulerProvider: ""

cmd/main.go

Lines changed: 32 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -34,6 +34,8 @@ import (
3434
"sigs.k8s.io/controller-runtime/pkg/log/zap"
3535
"sigs.k8s.io/controller-runtime/pkg/metrics/filters"
3636
metricsserver "sigs.k8s.io/controller-runtime/pkg/metrics/server"
37+
volcanov1beta1 "volcano.sh/apis/pkg/apis/scheduling/v1beta1"
38+
3739
configapi "sigs.k8s.io/lws/api/config/v1alpha1"
3840
leaderworkersetv1 "sigs.k8s.io/lws/api/leaderworkerset/v1"
3941
"sigs.k8s.io/lws/pkg/cert"
@@ -57,6 +59,7 @@ func init() {
5759

5860
utilruntime.Must(leaderworkersetv1.AddToScheme(scheme))
5961
utilruntime.Must(configapi.AddToScheme(scheme))
62+
utilruntime.Must(volcanov1beta1.AddToScheme(scheme))
6063
//+kubebuilder:scaffold:scheme
6164
}
6265

@@ -75,6 +78,10 @@ func main() {
7578
leaderElectResourceLock string
7679
leaderElectionID string
7780
configFile string
81+
82+
// gang scheduling related flags
83+
enableGangScheduling bool
84+
schedulerProvider string
7885
)
7986

8087
flag.StringVar(&metricsAddr, "metrics-bind-address", ":8443", "DEPRECATED(please pass configuration file via --config flag): The address the metric endpoint binds to.")
@@ -103,6 +110,8 @@ func main() {
103110
"The controller will load its initial configuration from this file. "+
104111
"Command-line flags will override any configurations set in this file. "+
105112
"Omit this flag to use the default configuration values.")
113+
flag.BoolVar(&enableGangScheduling, "enable-gang-scheduling", false, "Enable gang-scheduling for a group of lws pods [ 1 leader pod + (size-1) worker pods].")
114+
flag.StringVar(&schedulerProvider, "scheduler-provider", "", "The name of the scheduler that provides gang-scheduling capabilities.")
106115

107116
opts := zap.Options{
108117
Development: true,
@@ -116,7 +125,7 @@ func main() {
116125

117126
ctrl.SetLogger(zap.New(zap.UseFlagOptions(&opts)))
118127

119-
options, cfg, err := apply(configFile, probeAddr, enableLeaderElection, leaderElectLeaseDuration, leaderElectRenewDeadline, leaderElectRetryPeriod, leaderElectResourceLock, leaderElectionID, metricsAddr)
128+
options, cfg, err := apply(configFile, probeAddr, enableLeaderElection, leaderElectLeaseDuration, leaderElectRenewDeadline, leaderElectRetryPeriod, leaderElectResourceLock, leaderElectionID, metricsAddr, enableGangScheduling, schedulerProvider)
120129
if err != nil {
121130
setupLog.Error(err, "unable to load the configuration")
122131
os.Exit(1)
@@ -160,7 +169,7 @@ func main() {
160169
// Cert won't be ready until manager starts, so start a goroutine here which
161170
// will block until the cert is ready before setting up the controllers.
162171
// Controllers who register after manager starts will start directly.
163-
go setupControllers(mgr, certsReady)
172+
go setupControllers(mgr, certsReady, cfg)
164173

165174
setupHealthzAndReadyzCheck(mgr)
166175
setupLog.Info("starting manager")
@@ -171,7 +180,7 @@ func main() {
171180
}
172181

173182
}
174-
func setupControllers(mgr ctrl.Manager, certsReady chan struct{}) {
183+
func setupControllers(mgr ctrl.Manager, certsReady chan struct{}, cfg configapi.Configuration) {
175184
// The controllers won't work until the webhooks are operating,
176185
// and the webhook won't work until the certs are all in places.
177186
setupLog.Info("waiting for the cert generation to complete")
@@ -187,7 +196,11 @@ func setupControllers(mgr ctrl.Manager, certsReady chan struct{}) {
187196
os.Exit(1)
188197
}
189198
// Set up pod reconciler.
190-
podController := controllers.NewPodReconciler(mgr.GetClient(), mgr.GetScheme(), mgr.GetEventRecorderFor("leaderworkerset"))
199+
podController, err := controllers.NewPodReconciler(mgr.GetClient(), mgr.GetScheme(), mgr.GetEventRecorderFor("leaderworkerset"), cfg)
200+
if err != nil {
201+
setupLog.Error(err, "unable to create controller", "controller", "Pod")
202+
os.Exit(1)
203+
}
191204
if err := podController.SetupWithManager(mgr); err != nil {
192205
setupLog.Error(err, "unable to create controller", "controller", "Pod")
193206
os.Exit(1)
@@ -197,7 +210,7 @@ func setupControllers(mgr ctrl.Manager, certsReady chan struct{}) {
197210
setupLog.Error(err, "unable to create leaderworkerset webhook", "webhook", "LeaderWorkerSet")
198211
os.Exit(1)
199212
}
200-
if err := webhooks.SetupPodWebhook(mgr); err != nil {
213+
if err := webhooks.SetupPodWebhook(mgr, cfg); err != nil {
201214
setupLog.Error(err, "unable to create pod webhook", "webhook", "LeaderWorkerSet")
202215
os.Exit(1)
203216
}
@@ -225,7 +238,9 @@ func apply(configFile string,
225238
leaderElectRetryPeriod time.Duration,
226239
leaderElectResourceLock,
227240
leaderElectionID string,
228-
metricsAddr string) (ctrl.Options, configapi.Configuration, error) {
241+
metricsAddr string,
242+
enableGangScheduling bool,
243+
schedulerProvider string) (ctrl.Options, configapi.Configuration, error) {
229244
namespace := utils.GetOperatorNamespace()
230245

231246
options, cfg, err := config.Load(scheme, configFile)
@@ -258,6 +273,17 @@ func apply(configFile string,
258273
if flagsSet["leader-elect-resource-name"] {
259274
options.LeaderElectionID = leaderElectionID
260275
}
276+
if flagsSet["enable-gang-scheduling"] {
277+
cfg.EnableGangScheduling = enableGangScheduling
278+
}
279+
if flagsSet["scheduler-provider"] {
280+
cfg.SchedulerProvider = schedulerProvider
281+
}
282+
283+
// Validate the final configuration after command-line overrides
284+
if err := config.ValidateConfiguration(&cfg); err != nil {
285+
return options, cfg, err
286+
}
261287

262288
// Disabling http/2 to prevent being vulnerable to the HTTP/2 Stream Cancellation and
263289
// Rapid Reset CVEs. For more information see:

0 commit comments

Comments
 (0)