From dead8c7786cf31535db509fb6680bd187ef34380 Mon Sep 17 00:00:00 2001 From: Nick Baker Date: Mon, 24 Mar 2025 23:29:52 +0000 Subject: [PATCH 1/2] add al2023 nvidia arm ami types --- pkg/ami/ssm_resolver.go | 2 + pkg/ami/ssm_resolver_test.go | 2 - pkg/apis/eksctl.io/v1alpha5/defaults.go | 5 +- .../eksctl.io/v1alpha5/gpu_validation_test.go | 7 +-- pkg/apis/eksctl.io/v1alpha5/validation.go | 5 +- pkg/cfn/builder/managed_nodegroup.go | 63 +++++++++++-------- pkg/utils/instance/instance.go | 6 -- 7 files changed, 46 insertions(+), 44 deletions(-) diff --git a/pkg/ami/ssm_resolver.go b/pkg/ami/ssm_resolver.go index a4c6794238..be4da1e8ce 100644 --- a/pkg/ami/ssm_resolver.go +++ b/pkg/ami/ssm_resolver.go @@ -115,6 +115,8 @@ func MakeManagedSSMParameterName(version string, amiType ekstypes.AMITypes) stri return fmt.Sprintf("/aws/service/eks/optimized-ami/%s/%s/x86_64/neuron/recommended/release_version", version, utils.ToKebabCase(api.NodeImageFamilyAmazonLinux2023)) case ekstypes.AMITypesAl2023Arm64Standard: return fmt.Sprintf("/aws/service/eks/optimized-ami/%s/%s/arm64/standard/recommended/release_version", version, utils.ToKebabCase(api.NodeImageFamilyAmazonLinux2023)) + case ekstypes.AMITypesAl2023Arm64Nvidia: + return fmt.Sprintf("/aws/service/eks/optimized-ami/%s/%s/arm64/nvidia/recommended/release_version", version, utils.ToKebabCase(api.NodeImageFamilyAmazonLinux2023)) case ekstypes.AMITypesAl2X8664: return makeAL2ParameterName("") case ekstypes.AMITypesAl2X8664Gpu: diff --git a/pkg/ami/ssm_resolver_test.go b/pkg/ami/ssm_resolver_test.go index 7ab4efb000..c2bbd1d95b 100644 --- a/pkg/ami/ssm_resolver_test.go +++ b/pkg/ami/ssm_resolver_test.go @@ -757,8 +757,6 @@ var _ = Describe("AMI Auto Resolution", func() { var eksAMIType ekstypes.AMITypes for _, amiType := range eksAMIType.Values() { if amiType == ekstypes.AMITypesCustom || strings.HasPrefix(string(amiType), "WINDOWS_") || - // TODO: remove this condition after adding support for AL2023 Nvidia and Neuron AMI types. - amiType == ekstypes.AMITypesAl2023X8664Nvidia || amiType == ekstypes.AMITypesAl2023X8664Neuron || // TODO: remove this condition after support for Bottlerocket FIPS AMI types. amiType == ekstypes.AMITypesBottlerocketArm64Fips || amiType == ekstypes.AMITypesBottlerocketX8664Fips { continue diff --git a/pkg/apis/eksctl.io/v1alpha5/defaults.go b/pkg/apis/eksctl.io/v1alpha5/defaults.go index 5406f5da5b..cc0326fdea 100644 --- a/pkg/apis/eksctl.io/v1alpha5/defaults.go +++ b/pkg/apis/eksctl.io/v1alpha5/defaults.go @@ -11,7 +11,6 @@ import ( ekstypes "github.com/aws/aws-sdk-go-v2/service/eks/types" "github.com/weaveworks/eksctl/pkg/utils" - instanceutils "github.com/weaveworks/eksctl/pkg/utils/instance" ) const ( @@ -149,8 +148,8 @@ func SetManagedNodeGroupDefaults(ng *ManagedNodeGroup, meta *ClusterMeta, contro // When using custom AMIs, we want the user to explicitly specify AMI family. // Thus, we only set up default AMI family when no custom AMI is being used. if ng.AMIFamily == "" && ng.AMI == "" { - if isMinVer, _ := utils.IsMinVersion(Version1_30, meta.Version); isMinVer && - !instanceutils.IsARMGPUInstanceType(ng.InstanceType) { + // AL2023 is the default ami type on EKS managed nodegroups after 1.30. + if isMinVer, _ := utils.IsMinVersion(Version1_30, meta.Version); isMinVer { ng.AMIFamily = NodeImageFamilyAmazonLinux2023 } else { ng.AMIFamily = NodeImageFamilyAmazonLinux2 diff --git a/pkg/apis/eksctl.io/v1alpha5/gpu_validation_test.go b/pkg/apis/eksctl.io/v1alpha5/gpu_validation_test.go index b5e11eb18d..51e2c806e6 100644 --- a/pkg/apis/eksctl.io/v1alpha5/gpu_validation_test.go +++ b/pkg/apis/eksctl.io/v1alpha5/gpu_validation_test.go @@ -244,16 +244,11 @@ var _ = Describe("GPU instance support", func() { amiFamily := api.NodeImageFamilyAmazonLinux2023 instanceType := "g5g.2xlarge" - ngFail := api.NewNodeGroup() - ngFail.AMIFamily = amiFamily - ngFail.InstanceType = instanceType - ngPass := api.NewNodeGroup() ngPass.AMIFamily = amiFamily ngPass.InstanceType = instanceType ngPass.AMI = "ami-xxxx" - Expect(api.ValidateNodeGroup(0, ngFail, api.NewClusterConfig())).To(HaveOccurred()) Expect(api.ValidateNodeGroup(0, ngPass, api.NewClusterConfig())).NotTo(HaveOccurred()) }) @@ -269,7 +264,7 @@ var _ = Describe("GPU instance support", func() { } }, Entry("AmazonLinux2", api.NodeImageFamilyAmazonLinux2, true), - Entry("AmazonLinux2023", api.NodeImageFamilyAmazonLinux2023, true), + Entry("AmazonLinux2023", api.NodeImageFamilyAmazonLinux2023, false), Entry("Ubuntu2004", api.NodeImageFamilyUbuntu2004, true), Entry("Windows2019Full", api.NodeImageFamilyWindowsServer2019FullContainer, true), Entry("Windows2019Core", api.NodeImageFamilyWindowsServer2019CoreContainer, true), diff --git a/pkg/apis/eksctl.io/v1alpha5/validation.go b/pkg/apis/eksctl.io/v1alpha5/validation.go index 96e377d64b..787c889c02 100644 --- a/pkg/apis/eksctl.io/v1alpha5/validation.go +++ b/pkg/apis/eksctl.io/v1alpha5/validation.go @@ -1079,9 +1079,10 @@ func validateInstanceTypeSupport(ng *NodeGroup) error { if IsAMI(ng.AMI) { return nil } - if instanceutils.IsARMGPUInstanceType(SelectInstanceType(ng)) { + instanceType := SelectInstanceType(ng) + if instanceutils.IsARMInstanceType(instanceType) && instanceutils.IsNvidiaInstanceType(instanceType) { switch ng.AMIFamily { - case NodeImageFamilyBottlerocket: + case NodeImageFamilyBottlerocket, NodeImageFamilyAmazonLinux2023: default: return fmt.Errorf("ARM GPU instance types are not supported for unmanaged nodegroups with AMIFamily %s", ng.AMIFamily) } diff --git a/pkg/cfn/builder/managed_nodegroup.go b/pkg/cfn/builder/managed_nodegroup.go index f4d5f20869..1e1fbac9ab 100644 --- a/pkg/cfn/builder/managed_nodegroup.go +++ b/pkg/cfn/builder/managed_nodegroup.go @@ -307,17 +307,19 @@ func validateLaunchTemplate(launchTemplateData *ec2types.ResponseLaunchTemplateD func getAMIType(ng *api.ManagedNodeGroup, instanceType string) ekstypes.AMITypes { amiTypeMapping := map[string]struct { - X86x64 ekstypes.AMITypes - X86Nvidia ekstypes.AMITypes - X86Neuron ekstypes.AMITypes - ARM ekstypes.AMITypes - ARMGPU ekstypes.AMITypes + X86x64 ekstypes.AMITypes + X86Nvidia ekstypes.AMITypes + X86Neuron ekstypes.AMITypes + ARM ekstypes.AMITypes + ARM64Nvidia ekstypes.AMITypes + ARM64Neuron ekstypes.AMITypes }{ api.NodeImageFamilyAmazonLinux2023: { - X86x64: ekstypes.AMITypesAl2023X8664Standard, - X86Nvidia: ekstypes.AMITypesAl2023X8664Nvidia, - X86Neuron: ekstypes.AMITypesAl2023X8664Neuron, - ARM: ekstypes.AMITypesAl2023Arm64Standard, + X86x64: ekstypes.AMITypesAl2023X8664Standard, + X86Nvidia: ekstypes.AMITypesAl2023X8664Nvidia, + X86Neuron: ekstypes.AMITypesAl2023X8664Neuron, + ARM: ekstypes.AMITypesAl2023Arm64Standard, + ARM64Nvidia: ekstypes.AMITypesAl2023Arm64Nvidia, }, api.NodeImageFamilyAmazonLinux2: { X86x64: ekstypes.AMITypesAl2X8664, @@ -326,11 +328,10 @@ func getAMIType(ng *api.ManagedNodeGroup, instanceType string) ekstypes.AMITypes ARM: ekstypes.AMITypesAl2Arm64, }, api.NodeImageFamilyBottlerocket: { - X86x64: ekstypes.AMITypesBottlerocketX8664, - X86Nvidia: ekstypes.AMITypesBottlerocketX8664Nvidia, - X86Neuron: ekstypes.AMITypesBottlerocketX8664, - ARM: ekstypes.AMITypesBottlerocketArm64, - ARMGPU: ekstypes.AMITypesBottlerocketArm64Nvidia, + X86x64: ekstypes.AMITypesBottlerocketX8664, + X86Nvidia: ekstypes.AMITypesBottlerocketX8664Nvidia, + ARM: ekstypes.AMITypesBottlerocketArm64, + ARM64Nvidia: ekstypes.AMITypesBottlerocketArm64Nvidia, }, api.NodeImageFamilyWindowsServer2019FullContainer: { X86x64: ekstypes.AMITypesWindowsFull2019X8664, @@ -355,17 +356,29 @@ func getAMIType(ng *api.ManagedNodeGroup, instanceType string) ekstypes.AMITypes return ekstypes.AMITypesCustom } - switch { - case instanceutils.IsARMGPUInstanceType(instanceType): - return amiType.ARMGPU - case instanceutils.IsARMInstanceType(instanceType): - return amiType.ARM - case instanceutils.IsNvidiaInstanceType(instanceType): - return amiType.X86Nvidia - case instanceutils.IsNeuronInstanceType(instanceType): - return amiType.X86Neuron - default: - return amiType.X86x64 + // NOTE: currently this logic is designed to return the most appropriate + // amiType given it will run on the instance. this means the architecture + // must match, but is flexible when deciding the optimal amiType based on + // specific accelerators. In the case that an instance has no specialized + // amiType matching its accelerator, it will fall back to general variant. + if instanceutils.IsARMInstanceType(instanceType) { + switch { + case instanceutils.IsNvidiaInstanceType(instanceType) && amiType.ARM64Nvidia != "": + return amiType.ARM64Nvidia + case instanceutils.IsNeuronInstanceType(instanceType) && amiType.ARM64Neuron != "": + return amiType.ARM64Neuron + default: + return amiType.ARM + } + } else { + switch { + case instanceutils.IsNvidiaInstanceType(instanceType) && amiType.X86Nvidia != "": + return amiType.X86Nvidia + case instanceutils.IsNeuronInstanceType(instanceType) && amiType.X86Neuron != "": + return amiType.X86Neuron + default: + return amiType.X86x64 + } } } diff --git a/pkg/utils/instance/instance.go b/pkg/utils/instance/instance.go index b5cd1d568b..c63e30d36c 100644 --- a/pkg/utils/instance/instance.go +++ b/pkg/utils/instance/instance.go @@ -21,12 +21,6 @@ func IsNeuronInstanceType(instanceType string) bool { return InstanceTypesMap[instanceType].NeuronSupported } -// IsARMGPUInstanceType returns true if the instance type is ARM-GPU architecture -func IsARMGPUInstanceType(instanceType string) bool { - itype := InstanceTypesMap[instanceType] - return itype.CPUArch == "arm64" && itype.NvidiaGPUSupported -} - // IsNvidiaInstanceType returns true if the instance type has NVIDIA accelerated hardware func IsNvidiaInstanceType(instanceType string) bool { return InstanceTypesMap[instanceType].NvidiaGPUSupported From 1653cbba6a645e9909553757ce2556c2e7e05886 Mon Sep 17 00:00:00 2001 From: Nick Baker Date: Thu, 27 Mar 2025 21:50:06 +0000 Subject: [PATCH 2/2] ergonomic changes for validations --- go.mod | 8 +- go.sum | 16 ++-- pkg/apis/eksctl.io/v1alpha5/amitype.go | 91 +++++++++++++++++++ .../eksctl.io/v1alpha5/gpu_validation_test.go | 13 ++- pkg/apis/eksctl.io/v1alpha5/validation.go | 9 +- .../eksctl.io/v1alpha5/validation_test.go | 4 +- pkg/cfn/builder/managed_nodegroup.go | 81 +---------------- 7 files changed, 120 insertions(+), 102 deletions(-) create mode 100644 pkg/apis/eksctl.io/v1alpha5/amitype.go diff --git a/go.mod b/go.mod index 40a0f09bd6..37f7f605d4 100644 --- a/go.mod +++ b/go.mod @@ -11,13 +11,13 @@ require ( github.com/aws/aws-sdk-go-v2 v1.36.3 github.com/aws/aws-sdk-go-v2/config v1.29.12 github.com/aws/aws-sdk-go-v2/credentials v1.17.65 - github.com/aws/aws-sdk-go-v2/service/autoscaling v1.52.2 - github.com/aws/aws-sdk-go-v2/service/cloudformation v1.59.1 + github.com/aws/aws-sdk-go-v2/service/autoscaling v1.52.3 + github.com/aws/aws-sdk-go-v2/service/cloudformation v1.59.2 github.com/aws/aws-sdk-go-v2/service/cloudtrail v1.48.3 - github.com/aws/aws-sdk-go-v2/service/cloudwatchlogs v1.47.2 + github.com/aws/aws-sdk-go-v2/service/cloudwatchlogs v1.47.3 github.com/aws/aws-sdk-go-v2/service/cognitoidentityprovider v1.51.3 github.com/aws/aws-sdk-go-v2/service/ec2 v1.210.1 - github.com/aws/aws-sdk-go-v2/service/eks v1.63.1 + github.com/aws/aws-sdk-go-v2/service/eks v1.64.0 github.com/aws/aws-sdk-go-v2/service/elasticloadbalancing v1.29.2 github.com/aws/aws-sdk-go-v2/service/elasticloadbalancingv2 v1.45.1 github.com/aws/aws-sdk-go-v2/service/iam v1.41.1 diff --git a/go.sum b/go.sum index f98287d624..ec843e8b26 100644 --- a/go.sum +++ b/go.sum @@ -124,20 +124,20 @@ github.com/aws/aws-sdk-go-v2/internal/ini v1.8.3 h1:bIqFDwgGXXN1Kpp99pDOdKMTTb5d github.com/aws/aws-sdk-go-v2/internal/ini v1.8.3/go.mod h1:H5O/EsxDWyU+LP/V8i5sm8cxoZgc2fdNR9bxlOFrQTo= github.com/aws/aws-sdk-go-v2/internal/v4a v1.3.33 h1:/frG8aV09yhCVSOEC2pzktflJJO48NwY3xntHBwxHiA= github.com/aws/aws-sdk-go-v2/internal/v4a v1.3.33/go.mod h1:8vwASlAcV366M+qxZnjNzCjeastk1Rt1bpSRaGZanGU= -github.com/aws/aws-sdk-go-v2/service/autoscaling v1.52.2 h1:OA5uEC/SrjRLhNGHgF/iS6YQz1bjlrCje9sERyLlGro= -github.com/aws/aws-sdk-go-v2/service/autoscaling v1.52.2/go.mod h1:CDqMoc3KRdZJ8qziW96J35lKH01Wq3B2aihtHj2JbRs= -github.com/aws/aws-sdk-go-v2/service/cloudformation v1.59.1 h1:VaXjN6szl50hbLMfSOKBKl3bEOb805aHe8j1yv0fKhU= -github.com/aws/aws-sdk-go-v2/service/cloudformation v1.59.1/go.mod h1:penaZKzGmqHGZId4EUCBIW/f9l4Y7hQ5NKd45yoCYuI= +github.com/aws/aws-sdk-go-v2/service/autoscaling v1.52.3 h1:QsKdBxtC8csnKt5BbV7D1op4Nf13p2YkTJIkppaCakw= +github.com/aws/aws-sdk-go-v2/service/autoscaling v1.52.3/go.mod h1:CDqMoc3KRdZJ8qziW96J35lKH01Wq3B2aihtHj2JbRs= +github.com/aws/aws-sdk-go-v2/service/cloudformation v1.59.2 h1:o9cuZdZlI9VWMqsNa2mnf2IRsFAROHnaYA1BW3lHGuY= +github.com/aws/aws-sdk-go-v2/service/cloudformation v1.59.2/go.mod h1:penaZKzGmqHGZId4EUCBIW/f9l4Y7hQ5NKd45yoCYuI= github.com/aws/aws-sdk-go-v2/service/cloudtrail v1.48.3 h1:nTKHvvDTsS6SqAqu/fDhpmbNmDz+0ONh8niPoCkhPtM= github.com/aws/aws-sdk-go-v2/service/cloudtrail v1.48.3/go.mod h1:/BibEr5ksr34abqBTQN213GrNG6GCKCB6WG7CH4zH2w= -github.com/aws/aws-sdk-go-v2/service/cloudwatchlogs v1.47.2 h1:caIDFGKezQQA/kali05x3NF2DVwzjtOFjvNvFnEeCm4= -github.com/aws/aws-sdk-go-v2/service/cloudwatchlogs v1.47.2/go.mod h1:uo14VBn5cNk/BPGTPz3kyLBxgpgOObgO8lmz+H7Z4Ck= +github.com/aws/aws-sdk-go-v2/service/cloudwatchlogs v1.47.3 h1:3y0jkGtsaZLCg+n73BoSXOAkLFtgmD/+4prXW1pzovc= +github.com/aws/aws-sdk-go-v2/service/cloudwatchlogs v1.47.3/go.mod h1:uo14VBn5cNk/BPGTPz3kyLBxgpgOObgO8lmz+H7Z4Ck= github.com/aws/aws-sdk-go-v2/service/cognitoidentityprovider v1.51.3 h1:4U9dpQZTvJ0Mi1qn8L1hRJ4igFCQYEjwUuOmYkWM5tE= github.com/aws/aws-sdk-go-v2/service/cognitoidentityprovider v1.51.3/go.mod h1:ygltZT++6Wn2uG4+tqE0NW1MkdEtb5W2O/CFc0xJX/g= github.com/aws/aws-sdk-go-v2/service/ec2 v1.210.1 h1:+4A9SDduLZFlDeXWRmfQ6r8kyEJZQfK6lcg+KwdvWrI= github.com/aws/aws-sdk-go-v2/service/ec2 v1.210.1/go.mod h1:ouvGEfHbLaIlWwpDpOVWPWR+YwO0HDv3vm5tYLq8ImY= -github.com/aws/aws-sdk-go-v2/service/eks v1.63.1 h1:oI4AHf3K7cA+ukczcNwYsE8A7trMQiTRZTsgfkSS9BE= -github.com/aws/aws-sdk-go-v2/service/eks v1.63.1/go.mod h1:v1xXy6ea0PHtWkjFUvAUh6B/5wv7UF909Nru0dOIJDk= +github.com/aws/aws-sdk-go-v2/service/eks v1.64.0 h1:EYeOThTRysemFtC6J6h6b7dNg3jN03QuO5cg92ojIQE= +github.com/aws/aws-sdk-go-v2/service/eks v1.64.0/go.mod h1:v1xXy6ea0PHtWkjFUvAUh6B/5wv7UF909Nru0dOIJDk= github.com/aws/aws-sdk-go-v2/service/elasticloadbalancing v1.29.2 h1:Zlfmpg4QsduBeiK0vTc8WjnHZoYVGe64FcwuCsipjWE= github.com/aws/aws-sdk-go-v2/service/elasticloadbalancing v1.29.2/go.mod h1:H232HdqVlSUoqy0cMJYW1TKjcxvGFGFZ20xQG8fOAPw= github.com/aws/aws-sdk-go-v2/service/elasticloadbalancingv2 v1.45.1 h1:USXR7nfl+bu7HnR/M3KtnPD3wjlCXM72kYX+2PaIgEI= diff --git a/pkg/apis/eksctl.io/v1alpha5/amitype.go b/pkg/apis/eksctl.io/v1alpha5/amitype.go new file mode 100644 index 0000000000..cf01a771f7 --- /dev/null +++ b/pkg/apis/eksctl.io/v1alpha5/amitype.go @@ -0,0 +1,91 @@ +package v1alpha5 + +import ( + ekstypes "github.com/aws/aws-sdk-go-v2/service/eks/types" + instanceutils "github.com/weaveworks/eksctl/pkg/utils/instance" +) + +// GetAMIType returns the most appropriate amiType for the amiFamily and +// instanceType provided. Parameter `strict` controls whether or not fallbacks +// should be applied when searching for specialized amiTypes (eg. accerated +// instance types). If `strict` is false a fallback may be applied, otherwise a +// valid value is not guaranteed to be returned (empty string). +func GetAMIType(amiFamily, instanceType string, strict bool) ekstypes.AMITypes { + amiTypeMapping := map[string]struct { + X86x64 ekstypes.AMITypes + X86Nvidia ekstypes.AMITypes + X86Neuron ekstypes.AMITypes + ARM ekstypes.AMITypes + ARM64Nvidia ekstypes.AMITypes + ARM64Neuron ekstypes.AMITypes + }{ + NodeImageFamilyAmazonLinux2023: { + X86x64: ekstypes.AMITypesAl2023X8664Standard, + X86Nvidia: ekstypes.AMITypesAl2023X8664Nvidia, + X86Neuron: ekstypes.AMITypesAl2023X8664Neuron, + ARM: ekstypes.AMITypesAl2023Arm64Standard, + ARM64Nvidia: ekstypes.AMITypesAl2023Arm64Nvidia, + }, + NodeImageFamilyAmazonLinux2: { + X86x64: ekstypes.AMITypesAl2X8664, + X86Nvidia: ekstypes.AMITypesAl2X8664Gpu, + X86Neuron: ekstypes.AMITypesAl2X8664Gpu, + ARM: ekstypes.AMITypesAl2Arm64, + }, + NodeImageFamilyBottlerocket: { + X86x64: ekstypes.AMITypesBottlerocketX8664, + X86Nvidia: ekstypes.AMITypesBottlerocketX8664Nvidia, + X86Neuron: ekstypes.AMITypesBottlerocketX8664, + ARM: ekstypes.AMITypesBottlerocketArm64, + ARM64Nvidia: ekstypes.AMITypesBottlerocketArm64Nvidia, + ARM64Neuron: ekstypes.AMITypesBottlerocketArm64, + }, + NodeImageFamilyWindowsServer2019FullContainer: { + X86x64: ekstypes.AMITypesWindowsFull2019X8664, + X86Nvidia: ekstypes.AMITypesWindowsFull2019X8664, + }, + NodeImageFamilyWindowsServer2019CoreContainer: { + X86x64: ekstypes.AMITypesWindowsCore2019X8664, + X86Nvidia: ekstypes.AMITypesWindowsCore2019X8664, + }, + NodeImageFamilyWindowsServer2022FullContainer: { + X86x64: ekstypes.AMITypesWindowsFull2022X8664, + X86Nvidia: ekstypes.AMITypesWindowsFull2022X8664, + }, + NodeImageFamilyWindowsServer2022CoreContainer: { + X86x64: ekstypes.AMITypesWindowsCore2022X8664, + X86Nvidia: ekstypes.AMITypesWindowsCore2022X8664, + }, + } + + amiType, ok := amiTypeMapping[amiFamily] + if !ok { + return ekstypes.AMITypesCustom + } + + // this helper short circuits the check for missing entries for amiTypes in + // ami families based on the value of `strict`. + isValid := func(amiType ekstypes.AMITypes) bool { + return strict || amiType != "" + } + + if instanceutils.IsARMInstanceType(instanceType) { + switch { + case instanceutils.IsNvidiaInstanceType(instanceType) && isValid(amiType.ARM64Nvidia): + return amiType.ARM64Nvidia + case instanceutils.IsNeuronInstanceType(instanceType) && isValid(amiType.ARM64Neuron): + return amiType.ARM64Neuron + default: + return amiType.ARM + } + } else { + switch { + case instanceutils.IsNvidiaInstanceType(instanceType) && isValid(amiType.X86Nvidia): + return amiType.X86Nvidia + case instanceutils.IsNeuronInstanceType(instanceType) && isValid(amiType.X86Neuron): + return amiType.X86Neuron + default: + return amiType.X86x64 + } + } +} diff --git a/pkg/apis/eksctl.io/v1alpha5/gpu_validation_test.go b/pkg/apis/eksctl.io/v1alpha5/gpu_validation_test.go index 51e2c806e6..6efde9d02e 100644 --- a/pkg/apis/eksctl.io/v1alpha5/gpu_validation_test.go +++ b/pkg/apis/eksctl.io/v1alpha5/gpu_validation_test.go @@ -51,6 +51,10 @@ var _ = Describe("GPU instance support", func() { amiFamily: api.NodeImageFamilyAmazonLinux2023, gpuInstanceType: "g4dn.xlarge", }), + Entry("AL2023 ARM NVIDIA", gpuInstanceEntry{ + amiFamily: api.NodeImageFamilyAmazonLinux2023, + gpuInstanceType: "g5g.2xlarge", + }), Entry("AL2", gpuInstanceEntry{ gpuInstanceType: "asdf", amiFamily: api.NodeImageFamilyAmazonLinux2, @@ -96,7 +100,6 @@ var _ = Describe("GPU instance support", func() { ng.InstanceType = e.gpuInstanceType ng.AMIFamily = e.amiFamily assertValidationError(e, api.ValidateNodeGroup(0, ng, api.NewClusterConfig())) - }, Entry("AL2023 INF", gpuInstanceEntry{ amiFamily: api.NodeImageFamilyAmazonLinux2023, @@ -110,6 +113,10 @@ var _ = Describe("GPU instance support", func() { amiFamily: api.NodeImageFamilyAmazonLinux2023, gpuInstanceType: "g4dn.xlarge", }), + Entry("AL2023 ARM NVIDIA", gpuInstanceEntry{ + amiFamily: api.NodeImageFamilyAmazonLinux2023, + gpuInstanceType: "g5g.2xlarge", + }), Entry("AL2", gpuInstanceEntry{ gpuInstanceType: "g4dn.xlarge", amiFamily: api.NodeImageFamilyAmazonLinux2, @@ -258,14 +265,14 @@ var _ = Describe("GPU instance support", func() { ng.AMIFamily = amiFamily err := api.ValidateNodeGroup(0, ng, api.NewClusterConfig()) if expectErr { - Expect(err).To(MatchError(fmt.Sprintf("ARM GPU instance types are not supported for unmanaged nodegroups with AMIFamily %s", amiFamily))) + Expect(err).To(MatchError(fmt.Sprintf("%s instance types are not supported for unmanaged nodegroups with AMIFamily %s", ng.InstanceType, amiFamily))) } else { Expect(err).NotTo(HaveOccurred()) } }, Entry("AmazonLinux2", api.NodeImageFamilyAmazonLinux2, true), Entry("AmazonLinux2023", api.NodeImageFamilyAmazonLinux2023, false), - Entry("Ubuntu2004", api.NodeImageFamilyUbuntu2004, true), + Entry("Ubuntu2004", api.NodeImageFamilyUbuntu2004, false), Entry("Windows2019Full", api.NodeImageFamilyWindowsServer2019FullContainer, true), Entry("Windows2019Core", api.NodeImageFamilyWindowsServer2019CoreContainer, true), Entry("Bottlerocket", api.NodeImageFamilyBottlerocket, false), diff --git a/pkg/apis/eksctl.io/v1alpha5/validation.go b/pkg/apis/eksctl.io/v1alpha5/validation.go index 787c889c02..99693c7b2b 100644 --- a/pkg/apis/eksctl.io/v1alpha5/validation.go +++ b/pkg/apis/eksctl.io/v1alpha5/validation.go @@ -1080,12 +1080,9 @@ func validateInstanceTypeSupport(ng *NodeGroup) error { return nil } instanceType := SelectInstanceType(ng) - if instanceutils.IsARMInstanceType(instanceType) && instanceutils.IsNvidiaInstanceType(instanceType) { - switch ng.AMIFamily { - case NodeImageFamilyBottlerocket, NodeImageFamilyAmazonLinux2023: - default: - return fmt.Errorf("ARM GPU instance types are not supported for unmanaged nodegroups with AMIFamily %s", ng.AMIFamily) - } + amiType := GetAMIType(ng.AMIFamily, instanceType, true /* strict, don't allows fallbacks */) + if amiType == "" { + return fmt.Errorf("%s instance types are not supported for unmanaged nodegroups with AMIFamily %s", instanceType, ng.AMIFamily) } return nil } diff --git a/pkg/apis/eksctl.io/v1alpha5/validation_test.go b/pkg/apis/eksctl.io/v1alpha5/validation_test.go index 85fb7900ce..6f5cc763e2 100644 --- a/pkg/apis/eksctl.io/v1alpha5/validation_test.go +++ b/pkg/apis/eksctl.io/v1alpha5/validation_test.go @@ -1796,7 +1796,7 @@ var _ = Describe("ClusterConfig validation", func() { ng.InstancesDistribution.InstanceTypes = []string{"g5g.2xlarge"} ng.AMIFamily = api.NodeImageFamilyAmazonLinux2 err := api.ValidateNodeGroup(0, ng, cfg) - Expect(err).To(MatchError("ARM GPU instance types are not supported for unmanaged nodegroups with AMIFamily AmazonLinux2")) + Expect(err).To(MatchError("g5g.2xlarge instance types are not supported for unmanaged nodegroups with AMIFamily AmazonLinux2")) }) It("ARM-based GPU instance type fails for AmazonLinux2", func() { @@ -1804,7 +1804,7 @@ var _ = Describe("ClusterConfig validation", func() { ng.InstancesDistribution.InstanceTypes = nil ng.AMIFamily = api.NodeImageFamilyAmazonLinux2 err := api.ValidateNodeGroup(0, ng, cfg) - Expect(err).To(MatchError("ARM GPU instance types are not supported for unmanaged nodegroups with AMIFamily AmazonLinux2")) + Expect(err).To(MatchError("g5g.2xlarge instance types are not supported for unmanaged nodegroups with AMIFamily AmazonLinux2")) }) It("fails when instance distribution is enabled and instanceType set", func() { diff --git a/pkg/cfn/builder/managed_nodegroup.go b/pkg/cfn/builder/managed_nodegroup.go index 1e1fbac9ab..a469c433fd 100644 --- a/pkg/cfn/builder/managed_nodegroup.go +++ b/pkg/cfn/builder/managed_nodegroup.go @@ -153,7 +153,7 @@ func (m *ManagedNodeGroupResourceSet) AddAllResources(ctx context.Context) error instanceTypes := m.nodeGroup.InstanceTypeList() makeAMIType := func() *gfnt.Value { - return gfnt.NewString(string(getAMIType(m.nodeGroup, selectManagedInstanceType(m.nodeGroup)))) + return gfnt.NewString(string(api.GetAMIType(m.nodeGroup.AMIFamily, selectManagedInstanceType(m.nodeGroup), false /* not strict, allow fallback */))) } var launchTemplate *gfneks.Nodegroup_LaunchTemplateSpecification @@ -178,7 +178,7 @@ func (m *ManagedNodeGroupResourceSet) AddAllResources(ctx context.Context) error if launchTemplateData.InstanceType == "" { managedResource.AmiType = makeAMIType() } else { - managedResource.AmiType = gfnt.NewString(string(getAMIType(m.nodeGroup, string(launchTemplateData.InstanceType)))) + managedResource.AmiType = gfnt.NewString(string(api.GetAMIType(m.nodeGroup.AMIFamily, string(launchTemplateData.InstanceType), false /* not strict, allow fallback */))) } } @@ -305,83 +305,6 @@ func validateLaunchTemplate(launchTemplateData *ec2types.ResponseLaunchTemplateD return nil } -func getAMIType(ng *api.ManagedNodeGroup, instanceType string) ekstypes.AMITypes { - amiTypeMapping := map[string]struct { - X86x64 ekstypes.AMITypes - X86Nvidia ekstypes.AMITypes - X86Neuron ekstypes.AMITypes - ARM ekstypes.AMITypes - ARM64Nvidia ekstypes.AMITypes - ARM64Neuron ekstypes.AMITypes - }{ - api.NodeImageFamilyAmazonLinux2023: { - X86x64: ekstypes.AMITypesAl2023X8664Standard, - X86Nvidia: ekstypes.AMITypesAl2023X8664Nvidia, - X86Neuron: ekstypes.AMITypesAl2023X8664Neuron, - ARM: ekstypes.AMITypesAl2023Arm64Standard, - ARM64Nvidia: ekstypes.AMITypesAl2023Arm64Nvidia, - }, - api.NodeImageFamilyAmazonLinux2: { - X86x64: ekstypes.AMITypesAl2X8664, - X86Nvidia: ekstypes.AMITypesAl2X8664Gpu, - X86Neuron: ekstypes.AMITypesAl2X8664Gpu, - ARM: ekstypes.AMITypesAl2Arm64, - }, - api.NodeImageFamilyBottlerocket: { - X86x64: ekstypes.AMITypesBottlerocketX8664, - X86Nvidia: ekstypes.AMITypesBottlerocketX8664Nvidia, - ARM: ekstypes.AMITypesBottlerocketArm64, - ARM64Nvidia: ekstypes.AMITypesBottlerocketArm64Nvidia, - }, - api.NodeImageFamilyWindowsServer2019FullContainer: { - X86x64: ekstypes.AMITypesWindowsFull2019X8664, - X86Nvidia: ekstypes.AMITypesWindowsFull2019X8664, - }, - api.NodeImageFamilyWindowsServer2019CoreContainer: { - X86x64: ekstypes.AMITypesWindowsCore2019X8664, - X86Nvidia: ekstypes.AMITypesWindowsCore2019X8664, - }, - api.NodeImageFamilyWindowsServer2022FullContainer: { - X86x64: ekstypes.AMITypesWindowsFull2022X8664, - X86Nvidia: ekstypes.AMITypesWindowsFull2022X8664, - }, - api.NodeImageFamilyWindowsServer2022CoreContainer: { - X86x64: ekstypes.AMITypesWindowsCore2022X8664, - X86Nvidia: ekstypes.AMITypesWindowsCore2022X8664, - }, - } - - amiType, ok := amiTypeMapping[ng.AMIFamily] - if !ok { - return ekstypes.AMITypesCustom - } - - // NOTE: currently this logic is designed to return the most appropriate - // amiType given it will run on the instance. this means the architecture - // must match, but is flexible when deciding the optimal amiType based on - // specific accelerators. In the case that an instance has no specialized - // amiType matching its accelerator, it will fall back to general variant. - if instanceutils.IsARMInstanceType(instanceType) { - switch { - case instanceutils.IsNvidiaInstanceType(instanceType) && amiType.ARM64Nvidia != "": - return amiType.ARM64Nvidia - case instanceutils.IsNeuronInstanceType(instanceType) && amiType.ARM64Neuron != "": - return amiType.ARM64Neuron - default: - return amiType.ARM - } - } else { - switch { - case instanceutils.IsNvidiaInstanceType(instanceType) && amiType.X86Nvidia != "": - return amiType.X86Nvidia - case instanceutils.IsNeuronInstanceType(instanceType) && amiType.X86Neuron != "": - return amiType.X86Neuron - default: - return amiType.X86x64 - } - } -} - // RenderJSON implements the ResourceSet interface func (m *ManagedNodeGroupResourceSet) RenderJSON() ([]byte, error) { return m.resourceSet.renderJSON()