Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
66 changes: 66 additions & 0 deletions charts/agent-stack-k8s/values.schema.json
Original file line number Diff line number Diff line change
Expand Up @@ -677,6 +677,72 @@
"default": 1000000,
"minimum": 1,
"title": "Sets the maximum number of Jobs the controller will hold in the work queue."
},
"resource-classes": {
"type": "object",
"default": null,
"title": "Resource classes",
"description": "Define reusable resource configurations that can be applied to jobs based on the resource_class agent tag.",
"additionalProperties": {
"type": "object",
"properties": {
"resource": {
"$ref": "https://raw.githubusercontent.com/yannh/kubernetes-json-schema/master/v1.35.0/_definitions.json#/definitions/io.k8s.api.core.v1.ResourceRequirements"
},
"nodeSelector": {
"type": "object",
"additionalProperties": {
"type": "string"
}
}
}
},
"examples": [
{
"xs": {
"resource": {
"requests": {
"cpu": "100m",
"memory": "128Mi"
},
"limits": {
"cpu": "200m",
"memory": "256Mi"
}
}
},
"gpu": {
"resource": {
"requests": {
"nvidia.com/gpu": "1",
"cpu": "1000m",
"memory": "2Gi"
},
"limits": {
"nvidia.com/gpu": "1",
"cpu": "2000m",
"memory": "4Gi"
}
},
"nodeSelector": {
"accelerator": "nvidia-tesla-k80"
}
},
"spot": {
"nodeSelector": {
"node-type": "spot",
"instance-type": "large"
}
}
}
]
},
"default-resource-class-name": {
"type": "string",
"default": "",
"title": "Default resource class name",
"description": "The name of a resource class (defined in resource-classes) to apply by default to jobs that do not specify a resource_class agent tag.",
"examples": ["xs", "spot"]
}
},
"examples": [
Expand Down
9 changes: 9 additions & 0 deletions cmd/controller/controller.go
Original file line number Diff line number Diff line change
Expand Up @@ -167,6 +167,15 @@ func ParseAndValidateConfig(v *viper.Viper) (*config.Config, error) {
}
}

if cfg.DefaultResourceClassName != "" {
if cfg.ResourceClasses == nil {
return nil, fmt.Errorf("default-resource-class-name %q specified but no resource-classes defined", cfg.DefaultResourceClassName)
}
if _, exists := cfg.ResourceClasses[cfg.DefaultResourceClassName]; !exists {
return nil, fmt.Errorf("default-resource-class-name %q not found in resource-classes", cfg.DefaultResourceClassName)
}
}

if _, err := resource.ParseQuantity(cfg.ImageCheckContainerCPULimit); err != nil {
return nil, fmt.Errorf("invalid CPU resource limit defined: %s", cfg.ImageCheckContainerCPULimit)
}
Expand Down
56 changes: 56 additions & 0 deletions cmd/controller/controller_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -64,6 +64,7 @@ func TestReadAndParseConfig(t *testing.T) {
},
},
},
DefaultResourceClassName: "small",

WorkspaceVolume: &corev1.Volume{
Name: "workspace-2-the-reckoning",
Expand Down Expand Up @@ -185,3 +186,58 @@ func TestReadAndParseConfig(t *testing.T) {
t.Errorf("parsed config diff (-got +want):\n%s", diff)
}
}

func TestParseAndValidateConfig_DefaultResourceClassValidation(t *testing.T) {
tests := []struct {
name string
config map[string]any
wantErr string
}{
{
name: "default references non-existent resource class",
config: map[string]any{
"agent-token-secret": "test",
"image": "test:latest",
"job-active-deadline-seconds": 3600,
"namespace": "default",
"default-resource-class-name": "nonexistent",
"resource-classes": map[string]any{
"small": map[string]any{
"resource": map[string]any{
"requests": map[string]any{"cpu": "100m"},
},
},
},
},
wantErr: `default-resource-class-name "nonexistent" not found in resource-classes`,
},
{
name: "default specified but no resource classes defined",
config: map[string]any{
"agent-token-secret": "test",
"image": "test:latest",
"job-active-deadline-seconds": 3600,
"namespace": "default",
"default-resource-class-name": "small",
},
wantErr: `default-resource-class-name "small" specified but no resource-classes defined`,
},
}

for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) {
cmd := &cobra.Command{}
controller.AddConfigFlags(cmd)
v, err := controller.ReadConfigFromFileArgsAndEnv(cmd, []string{})
require.NoError(t, err)

for k, val := range tt.config {
v.Set(k, val)
}

_, err = controller.ParseAndValidateConfig(v)
require.Error(t, err)
require.Contains(t, err.Error(), tt.wantErr)
})
}
}
2 changes: 2 additions & 0 deletions examples/config.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -34,6 +34,8 @@ resource-classes:
memory: "512Mi"
hugepages-2Mi: "1Mi"

default-resource-class-name: "small"

queue: my-queue
tags:
- priority=high
Expand Down
3 changes: 2 additions & 1 deletion internal/controller/config/config.go
Original file line number Diff line number Diff line change
Expand Up @@ -92,7 +92,8 @@ type Config struct {
DefaultMetadata Metadata `json:"default-metadata" validate:"omitempty"`
AdditionalRedactedVars []string `json:"additional-redacted-vars" validate:"omitempty"`

ResourceClasses map[string]*ResourceClass `json:"resource-classes" validate:"omitempty"`
ResourceClasses map[string]*ResourceClass `json:"resource-classes" validate:"omitempty"`
DefaultResourceClassName string `json:"default-resource-class-name" validate:"omitempty"`

DefaultImagePullPolicy corev1.PullPolicy `json:"default-image-pull-policy" validate:"omitempty"`
DefaultImageCheckPullPolicy corev1.PullPolicy `json:"default-image-check-pull-policy" validate:"omitempty"`
Expand Down
1 change: 1 addition & 0 deletions internal/controller/controller.go
Original file line number Diff line number Diff line change
Expand Up @@ -185,6 +185,7 @@ func Run(ctx context.Context, logger *slog.Logger, k8sClient kubernetes.Interfac
ImageCheckContainerCPULimit: cfg.ImageCheckContainerCPULimit,
ImageCheckContainerMemoryLimit: cfg.ImageCheckContainerMemoryLimit,
ResourceClasses: cfg.ResourceClasses,
DefaultResourceClassName: cfg.DefaultResourceClassName,
})

informerFactory, err := NewInformerFactory(k8sClient, cfg.Namespace, cfg.ID)
Expand Down
27 changes: 21 additions & 6 deletions internal/controller/scheduler/resource.go
Original file line number Diff line number Diff line change
Expand Up @@ -6,24 +6,39 @@ import (
corev1 "k8s.io/api/core/v1"
)

// applyResourceClass applies resource class if specified in agent tags as "resource_class"
// applyResourceClass applies resource class if specified in agent tags as "resource_class",
// or applies the default resource class if configured and no tag is present.
func (w *worker) applyResourceClass(podSpec *corev1.PodSpec, tags map[string]string) error {
resourceClassName, resourceClassTagExist := tags["resource_class"]

if !resourceClassTagExist {
return nil
if w.cfg.DefaultResourceClassName == "" {
return nil
}
resourceClassName = w.cfg.DefaultResourceClassName
}

if w.cfg.ResourceClasses == nil {
return fmt.Errorf("resource classes not configured but resource_class tag specified")
if resourceClassTagExist {
return fmt.Errorf("resource classes not configured but resource_class tag specified")
}
return fmt.Errorf("resource classes not configured but default-resource-class-name is set")
}
resourceClass, resourceClassFound := w.cfg.ResourceClasses[resourceClassName]

resourceClass, resourceClassFound := w.cfg.ResourceClasses[resourceClassName]
if !resourceClassFound {
return fmt.Errorf("resource class not found: %s", resourceClassName)
if resourceClassTagExist {
return fmt.Errorf("resource class not found: %s", resourceClassName)
}
return fmt.Errorf("default resource class not found: %s", resourceClassName)
}

resourceClass.Apply(podSpec)
w.logger.Debug("Applied resource class", "resource_class", resourceClassName)
if resourceClassTagExist {
w.logger.Debug("Applied resource class", "resource_class", resourceClassName)
} else {
w.logger.Debug("Applied default resource class", "resource_class", resourceClassName)
}

return nil
}
161 changes: 161 additions & 0 deletions internal/controller/scheduler/resource_test.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,161 @@
package scheduler

import (
"log/slog"
"os"
"testing"

"github.com/buildkite/agent-stack-k8s/v2/internal/controller/config"
"github.com/stretchr/testify/assert"
"github.com/stretchr/testify/require"
corev1 "k8s.io/api/core/v1"
"k8s.io/apimachinery/pkg/api/resource"
)

func TestApplyResourceClass(t *testing.T) {
logger := slog.New(slog.NewTextHandler(os.Stdout, nil))

smallResourceClass := &config.ResourceClass{
Resource: &corev1.ResourceRequirements{
Requests: corev1.ResourceList{
corev1.ResourceCPU: resource.MustParse("100m"),
corev1.ResourceMemory: resource.MustParse("128Mi"),
},
Limits: corev1.ResourceList{
corev1.ResourceCPU: resource.MustParse("200m"),
corev1.ResourceMemory: resource.MustParse("256Mi"),
},
},
}

largeResourceClass := &config.ResourceClass{
Resource: &corev1.ResourceRequirements{
Requests: corev1.ResourceList{
corev1.ResourceCPU: resource.MustParse("1"),
corev1.ResourceMemory: resource.MustParse("1Gi"),
},
Limits: corev1.ResourceList{
corev1.ResourceCPU: resource.MustParse("2"),
corev1.ResourceMemory: resource.MustParse("2Gi"),
},
},
}

commandContainerEnv := []corev1.EnvVar{
{Name: "BUILDKITE_BOOTSTRAP_PHASES", Value: "plugin,command"},
{Name: "BUILDKITE_COMMAND", Value: "echo hello"},
}

tests := []struct {
name string
resourceClasses map[string]*config.ResourceClass
defaultResourceClassName string
tags map[string]string
wantErr string
wantResources *corev1.ResourceRequirements
}{
{
name: "no tag and no default - no resources applied",
resourceClasses: map[string]*config.ResourceClass{"small": smallResourceClass},
defaultResourceClassName: "",
tags: map[string]string{"queue": "test"},
wantErr: "",
wantResources: nil,
},
{
name: "tag specified - applies tagged resource class",
resourceClasses: map[string]*config.ResourceClass{"small": smallResourceClass, "large": largeResourceClass},
defaultResourceClassName: "",
tags: map[string]string{"queue": "test", "resource_class": "large"},
wantErr: "",
wantResources: largeResourceClass.Resource,
},
{
name: "no tag but default set - applies default resource class",
resourceClasses: map[string]*config.ResourceClass{"small": smallResourceClass, "large": largeResourceClass},
defaultResourceClassName: "small",
tags: map[string]string{"queue": "test"},
wantErr: "",
wantResources: smallResourceClass.Resource,
},
{
name: "tag overrides default - applies tagged resource class",
resourceClasses: map[string]*config.ResourceClass{"small": smallResourceClass, "large": largeResourceClass},
defaultResourceClassName: "small",
tags: map[string]string{"queue": "test", "resource_class": "large"},
wantErr: "",
wantResources: largeResourceClass.Resource,
},
{
name: "tag specified but resource classes not configured",
resourceClasses: nil,
defaultResourceClassName: "",
tags: map[string]string{"queue": "test", "resource_class": "small"},
wantErr: "resource classes not configured but resource_class tag specified",
wantResources: nil,
},
{
name: "default set but resource classes not configured",
resourceClasses: nil,
defaultResourceClassName: "small",
tags: map[string]string{"queue": "test"},
wantErr: "resource classes not configured but default-resource-class-name is set",
wantResources: nil,
},
{
name: "tag references non-existent resource class",
resourceClasses: map[string]*config.ResourceClass{"small": smallResourceClass},
defaultResourceClassName: "",
tags: map[string]string{"queue": "test", "resource_class": "nonexistent"},
wantErr: "resource class not found: nonexistent",
wantResources: nil,
},
{
name: "default references non-existent resource class",
resourceClasses: map[string]*config.ResourceClass{"small": smallResourceClass},
defaultResourceClassName: "nonexistent",
tags: map[string]string{"queue": "test"},
wantErr: "default resource class not found: nonexistent",
wantResources: nil,
},
}

for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) {
w := &worker{
cfg: Config{
ResourceClasses: tt.resourceClasses,
DefaultResourceClassName: tt.defaultResourceClassName,
},
logger: logger,
}

podSpec := &corev1.PodSpec{
Containers: []corev1.Container{
{
Name: "container-0",
Env: commandContainerEnv,
},
},
}

err := w.applyResourceClass(podSpec, tt.tags)

if tt.wantErr != "" {
require.Error(t, err)
assert.Contains(t, err.Error(), tt.wantErr)
return
}

require.NoError(t, err)

if tt.wantResources == nil {
assert.Empty(t, podSpec.Containers[0].Resources.Requests)
assert.Empty(t, podSpec.Containers[0].Resources.Limits)
} else {
assert.Equal(t, tt.wantResources.Requests, podSpec.Containers[0].Resources.Requests)
assert.Equal(t, tt.wantResources.Limits, podSpec.Containers[0].Resources.Limits)
}
})
}
}
1 change: 1 addition & 0 deletions internal/controller/scheduler/scheduler.go
Original file line number Diff line number Diff line change
Expand Up @@ -79,6 +79,7 @@ type Config struct {
ImageCheckContainerCPULimit string
ImageCheckContainerMemoryLimit string
ResourceClasses map[string]*config.ResourceClass
DefaultResourceClassName string
}

func New(logger *slog.Logger, client kubernetes.Interface, agentClient *api.AgentClient, cfg Config) *worker {
Expand Down
Loading