diff --git a/pkg/apis/eksctl.io/v1alpha5/beta.go b/pkg/apis/eksctl.io/v1alpha5/beta.go new file mode 100644 index 0000000000..82645aabc2 --- /dev/null +++ b/pkg/apis/eksctl.io/v1alpha5/beta.go @@ -0,0 +1,14 @@ +package v1alpha5 + +import ( + "os" + "strings" +) + +func (c *ClusterConfig) IsCustomEksEndpoint() bool { + eksEndpoint := os.Getenv("AWS_EKS_ENDPOINT") + if eksEndpoint == "" { + eksEndpoint = os.Getenv("AWS_ENDPOINT_URL_EKS") + } + return strings.Contains(eksEndpoint, "beta") || strings.Contains(eksEndpoint, "gamma") +} diff --git a/pkg/cfn/builder/beta.go b/pkg/cfn/builder/beta.go new file mode 100644 index 0000000000..e8169f5fa3 --- /dev/null +++ b/pkg/cfn/builder/beta.go @@ -0,0 +1,253 @@ +package builder + +import ( + "context" + _ "embed" + "fmt" + "os" + "strings" + + "github.com/aws/aws-sdk-go-v2/service/sts" + + "github.com/weaveworks/eksctl/pkg/awsapi" + cft "github.com/weaveworks/eksctl/pkg/cfn/template" + "github.com/weaveworks/eksctl/pkg/goformation" + gfn "github.com/weaveworks/eksctl/pkg/goformation/cloudformation" + "github.com/weaveworks/eksctl/pkg/goformation/cloudformation/cloudformation" + gfneks "github.com/weaveworks/eksctl/pkg/goformation/cloudformation/eks" + "github.com/weaveworks/eksctl/pkg/goformation/cloudformation/lambda" + gfnt "github.com/weaveworks/eksctl/pkg/goformation/cloudformation/types" +) + +//go:embed templates/beta-resources.yaml +var betaResourcesTemplate []byte + +//go:embed templates/beta.py +var lambdaBetaPy []byte + +func addBetaResources(stsAPI awsapi.STS, stackName string, clusterTemplate *gfn.Template, g *gfneks.Cluster) error { + + identity, err := stsAPI.GetCallerIdentity(context.TODO(), &sts.GetCallerIdentityInput{}) + if err != nil { + return fmt.Errorf("unable to get identity: %w", err) + } + userArn := *identity.Arn + baseArn := userArn[:strings.LastIndex(userArn, "/")] + roleArn := fmt.Sprintf("%s%s", baseArn, "/{{SessionName}}") + iamARN := strings.Replace( + strings.Replace(baseArn, "assumed-role", "role", 1), + "sts", "iam", 1) + + clusterName := "eksctl-" + stackName + "-cluster" + + template, err := goformation.ParseYAML(betaResourcesTemplate) + if err != nil { + return err + } + for resourceName, resource := range template.Resources { + clusterTemplate.Resources[resourceName] = resource + } + for key, output := range template.Outputs { + clusterTemplate.Outputs[key] = output + } + customResource := clusterTemplate.Resources["ControlPlane"].(*gfn.CustomResource) + if g.AccessConfig != nil { + customResource.Properties["AccessConfig"] = g.AccessConfig + } + if g.BootstrapSelfManagedAddons != nil { + customResource.Properties["BootstrapSelfManagedAddons"] = g.BootstrapSelfManagedAddons + } + if g.ComputeConfig != nil { + customResource.Properties["ComputeConfig"] = g.ComputeConfig + } + if g.EncryptionConfig != nil { + customResource.Properties["EncryptionConfig"] = g.EncryptionConfig + } + if g.KubernetesNetworkConfig != nil { + customResource.Properties["KubernetesNetworkConfig"] = g.KubernetesNetworkConfig + } + if g.Logging != nil { + customResource.Properties["Logging"] = g.Logging + } + if g.Name != nil { + customResource.Properties["Name"] = g.Name + } + if g.OutpostConfig != nil { + customResource.Properties["OutpostConfig"] = g.OutpostConfig + } + if g.RemoteNetworkConfig != nil { + customResource.Properties["RemoteNetworkConfig"] = g.RemoteNetworkConfig + } + if g.ResourcesVpcConfig != nil { + customResource.Properties["ResourcesVpcConfig"] = g.ResourcesVpcConfig + } + if g.RoleArn != nil { + customResource.Properties["RoleArn"] = g.RoleArn + } + if g.StorageConfig != nil { + customResource.Properties["StorageConfig"] = g.StorageConfig + } + if g.Tags != nil { + g.Tags = append(g.Tags, cloudformation.Tag{ + Key: gfnt.NewString("Name"), + Value: gfnt.NewString(clusterName + "/ControlPlane"), + }) + customResource.Properties["Tags"] = g.Tags + } else { + customResource.Properties["Tags"] = []cloudformation.Tag{ + { + Key: gfnt.NewString("Name"), + Value: gfnt.NewString(clusterName + "/ControlPlane"), + }, + } + } + if g.UpgradePolicy != nil { + customResource.Properties["UpgradePolicy"] = g.UpgradePolicy + } + if g.Version != nil { + customResource.Properties["Version"] = g.Version + } + if g.ZonalShiftConfig != nil { + customResource.Properties["ZonalShiftConfig"] = g.ZonalShiftConfig + } + + customResource.Properties["IAMPrincipalArn"] = gfnt.NewString(iamARN) + customResource.Properties["STSRoleArn"] = gfnt.NewString(roleArn) + + customFunction := clusterTemplate.Resources["CustomEKSFunction"].(*lambda.Function) + customFunction.Code = &lambda.Function_Code{ + ZipFile: gfnt.NewString(string(lambdaBetaPy)), + } + + clusterTemplate.Outputs["EKSFunctionArn"] = gfn.Output{ + Value: gfnt.MakeFnGetAttString("CustomEKSFunction", "Arn"), + Export: &gfn.Export{ + Name: gfnt.MakeFnSubString(fmt.Sprintf("${%s}::EKSFunctionArn", gfnt.StackName)), + }, + } + + clusterTemplate.Parameters["EksEndpointUrl"] = gfn.Parameter{ + Type: "String", + Description: "The endpoint URL for the EKS service", + Default: gfnt.NewString(os.Getenv("AWS_ENDPOINT_URL_EKS")), + } + return nil +} + +func addBetaManagedNodeGroupResources(managedResource *gfneks.Nodegroup, stackName string) *gfn.CustomResource { + customResource := &gfn.CustomResource{ + Type: "Custom::EksManagedNodeGroup", + } + customResource.Properties = make(map[string]interface{}) + functionArn := gfnt.MakeFnImportValueString(fmt.Sprintf("eksctl-%s-cluster::EKSFunctionArn", stackName)) + customResource.Properties["ServiceToken"] = functionArn + + if managedResource.AmiType != nil { + customResource.Properties["AmiType"] = managedResource.AmiType + } + if managedResource.CapacityType != nil { + customResource.Properties["CapacityType"] = managedResource.CapacityType + } + if managedResource.ClusterName != nil { + customResource.Properties["ClusterName"] = managedResource.ClusterName + } + if managedResource.DiskSize != nil { + customResource.Properties["DiskSize"] = managedResource.DiskSize + } + if managedResource.ForceUpdateEnabled != nil { + customResource.Properties["ForceUpdateEnabled"] = managedResource.ForceUpdateEnabled + } + if managedResource.InstanceTypes != nil { + customResource.Properties["InstanceTypes"] = managedResource.InstanceTypes + } + if managedResource.Labels != nil { + customResource.Properties["Labels"] = managedResource.Labels + } + if managedResource.LaunchTemplate != nil { + customResource.Properties["LaunchTemplate"] = managedResource.LaunchTemplate + } + if managedResource.NodeRepairConfig != nil { + customResource.Properties["NodeRepairConfig"] = managedResource.NodeRepairConfig + } + if managedResource.NodeRole != nil { + customResource.Properties["NodeRole"] = managedResource.NodeRole + } + if managedResource.NodegroupName != nil { + customResource.Properties["NodegroupName"] = managedResource.NodegroupName + } + if managedResource.ReleaseVersion != nil { + customResource.Properties["ReleaseVersion"] = managedResource.ReleaseVersion + } + if managedResource.RemoteAccess != nil { + customResource.Properties["RemoteAccess"] = managedResource.RemoteAccess + } + if managedResource.ScalingConfig != nil { + customResource.Properties["ScalingConfig"] = managedResource.ScalingConfig + } + if managedResource.Subnets != nil { + customResource.Properties["Subnets"] = managedResource.Subnets + } + if managedResource.Tags != nil { + customResource.Properties["Tags"] = managedResource.Tags + } + if managedResource.Taints != nil { + customResource.Properties["Taints"] = managedResource.Taints + } + if managedResource.UpdateConfig != nil { + customResource.Properties["UpdateConfig"] = managedResource.UpdateConfig + } + if managedResource.Version != nil { + customResource.Properties["Version"] = managedResource.Version + } + + return customResource +} + +func createBetaAssumeRolePolicy() interface{} { + statements := []cft.MapOfInterfaces{ + { + "Effect": "Allow", + "Principal": cft.MapOfInterfaces{ + "Service": "eks.amazonaws.com", + }, + "Action": []string{ + "sts:AssumeRole", + "sts:TagSession", + }, + }, + { + "Effect": "Allow", + "Principal": cft.MapOfInterfaces{ + "Service": "eks-beta.aws.internal", + }, + "Action": []string{ + "sts:AssumeRole", + "sts:TagSession", + }, + }, + { + "Effect": "Allow", + "Principal": cft.MapOfInterfaces{ + "Service": "eks-gamma.aws.internal", + }, + "Action": []string{ + "sts:AssumeRole", + "sts:TagSession", + }, + }, + } + return cft.MakePolicyDocument(statements...) +} + +func addBetaAccessEntry(stackName string, accessEntryType string) *gfn.CustomResource { + customResource := &gfn.CustomResource{ + Type: "Custom::EksAccessEntry", + } + customResource.Properties = make(map[string]interface{}) + functionArn := gfnt.MakeFnImportValueString(fmt.Sprintf("eksctl-%s-cluster::EKSFunctionArn", stackName)) + customResource.Properties["ServiceToken"] = functionArn + customResource.Properties["PrincipalArn"] = gfnt.MakeFnGetAttString(cfnIAMInstanceRoleName, "Arn") + customResource.Properties["ClusterName"] = gfnt.NewString(stackName) + customResource.Properties["Type"] = gfnt.NewString(accessEntryType) + return customResource +} diff --git a/pkg/cfn/builder/cluster.go b/pkg/cfn/builder/cluster.go index 501b5be1f9..61917953a4 100644 --- a/pkg/cfn/builder/cluster.go +++ b/pkg/cfn/builder/cluster.go @@ -29,10 +29,11 @@ type ClusterResourceSet struct { region string vpcResourceSet VPCResourceSet securityGroups *gfnt.Value + stsAPI awsapi.STS } // NewClusterResourceSet returns a resource set for the new cluster. -func NewClusterResourceSet(ec2API awsapi.EC2, region string, spec *api.ClusterConfig, existingStack *gjson.Result, extendForOutposts bool) *ClusterResourceSet { +func NewClusterResourceSet(ec2API awsapi.EC2, stsAPI awsapi.STS, region string, spec *api.ClusterConfig, existingStack *gjson.Result, extendForOutposts bool) *ClusterResourceSet { var usesExistingVPC bool if existingStack != nil { unsetExistingResources(existingStack, spec) @@ -59,6 +60,7 @@ func NewClusterResourceSet(ec2API awsapi.EC2, region string, spec *api.ClusterCo rs: rs, spec: spec, ec2API: ec2API, + stsAPI: stsAPI, region: region, vpcResourceSet: vpcResourceSet, } @@ -409,7 +411,14 @@ func (c *ClusterResourceSet) addResourcesForControlPlane(subnetDetails *SubnetDe } } - c.newResource("ControlPlane", &cluster) + if c.spec.IsCustomEksEndpoint() { + err := addBetaResources(c.stsAPI, c.spec.Metadata.Name, c.rs.template, &cluster) + if err != nil { + return fmt.Errorf("unable to add beta resources: %w", err) + } + } else { + c.newResource("ControlPlane", &cluster) + } if c.spec.Status == nil { c.spec.Status = &api.ClusterStatus{} diff --git a/pkg/cfn/builder/cluster_test.go b/pkg/cfn/builder/cluster_test.go index 97a5833d31..8976ac40bc 100644 --- a/pkg/cfn/builder/cluster_test.go +++ b/pkg/cfn/builder/cluster_test.go @@ -45,7 +45,7 @@ var _ = Describe("Cluster Template Builder", func() { }) JustBeforeEach(func() { - crs = builder.NewClusterResourceSet(provider.EC2(), provider.Region(), cfg, existingStack, false) + crs = builder.NewClusterResourceSet(provider.EC2(), provider.STS(), provider.Region(), cfg, existingStack, false) }) Describe("AddAllResources", func() { diff --git a/pkg/cfn/builder/iam.go b/pkg/cfn/builder/iam.go index b1f2534194..b64a0d6d4d 100644 --- a/pkg/cfn/builder/iam.go +++ b/pkg/cfn/builder/iam.go @@ -126,6 +126,9 @@ func (c *ClusterResourceSet) addResourcesForServiceRole() { ), ManagedPolicyArns: gfnt.NewSlice(makePolicyARNs(managedPolicyARNs...)...), } + if c.spec.IsCustomEksEndpoint() { + role.AssumeRolePolicyDocument = createBetaAssumeRolePolicy() + } } if api.IsSetAndNonEmptyString(c.spec.IAM.ServiceRolePermissionsBoundary) { diff --git a/pkg/cfn/builder/managed_nodegroup.go b/pkg/cfn/builder/managed_nodegroup.go index 1edfef26af..f4d5f20869 100644 --- a/pkg/cfn/builder/managed_nodegroup.go +++ b/pkg/cfn/builder/managed_nodegroup.go @@ -220,7 +220,11 @@ func (m *ManagedNodeGroupResourceSet) AddAllResources(ctx context.Context) error } managedResource.LaunchTemplate = launchTemplate - m.newResource(ManagedNodeGroupResourceName, managedResource) + if m.clusterConfig.IsCustomEksEndpoint() { + m.newResource(ManagedNodeGroupResourceName, addBetaManagedNodeGroupResources(managedResource, m.clusterConfig.Metadata.Name)) + } else { + m.newResource(ManagedNodeGroupResourceName, managedResource) + } return nil } diff --git a/pkg/cfn/builder/nodegroup.go b/pkg/cfn/builder/nodegroup.go index 145903fbc4..da3f352dce 100644 --- a/pkg/cfn/builder/nodegroup.go +++ b/pkg/cfn/builder/nodegroup.go @@ -176,11 +176,17 @@ func (n *NodeGroupResourceSet) addAccessEntry() { return } - n.newResource("AccessEntry", &gfneks.AccessEntry{ - PrincipalArn: gfnt.MakeFnGetAttString(cfnIAMInstanceRoleName, "Arn"), - ClusterName: gfnt.NewString(n.options.ClusterConfig.Metadata.Name), - Type: gfnt.NewString(string(api.GetAccessEntryType(n.options.NodeGroup))), - }) + if n.options.ClusterConfig.IsCustomEksEndpoint() { + n.newResource("AccessEntry", + addBetaAccessEntry(n.options.ClusterConfig.Metadata.Name, + string(api.GetAccessEntryType(n.options.NodeGroup)))) + } else { + n.newResource("AccessEntry", &gfneks.AccessEntry{ + PrincipalArn: gfnt.MakeFnGetAttString(cfnIAMInstanceRoleName, "Arn"), + ClusterName: gfnt.NewString(n.options.ClusterConfig.Metadata.Name), + Type: gfnt.NewString(string(api.GetAccessEntryType(n.options.NodeGroup))), + }) + } } func (n *NodeGroupResourceSet) addResourcesForSecurityGroups() { diff --git a/pkg/cfn/builder/templates/beta-resources.yaml b/pkg/cfn/builder/templates/beta-resources.yaml new file mode 100644 index 0000000000..5e865a5db3 --- /dev/null +++ b/pkg/cfn/builder/templates/beta-resources.yaml @@ -0,0 +1,101 @@ +AWSTemplateFormatVersion: '2010-09-09' +Description: CloudFormation template to create an EKS cluster using a custom Lambda function with a unique name. + +Parameters: + EksEndpointUrl: + Type: String + Description: The endpoint URL for the EKS service + +Resources: + ControlPlane: + Type: Custom::EksCluster + Properties: + ServiceToken: !GetAtt CustomEKSFunction.Arn + RoleArn: !GetAtt ServiceRole.Arn + + CustomEKSFunction: + Type: AWS::Lambda::Function + Properties: + FunctionName: !Sub '${AWS::StackName}-CustomEKSFunction' + Handler: index.handler + Runtime: python3.13 + Timeout: 900 + Role: !GetAtt LambdaExecutionRole.Arn + Environment: + Variables: + AWS_ENDPOINT_URL_EKS: !Ref EksEndpointUrl + + LambdaExecutionRole: + Type: AWS::IAM::Role + Properties: + ManagedPolicyArns: + - arn:aws:iam::aws:policy/AmazonEKSBlockStoragePolicy + - arn:aws:iam::aws:policy/AmazonEKSClusterPolicy + - arn:aws:iam::aws:policy/AmazonEKSComputePolicy + - arn:aws:iam::aws:policy/AmazonEKSLoadBalancingPolicy + - arn:aws:iam::aws:policy/AmazonEKSNetworkingPolicy + - arn:aws:iam::aws:policy/AmazonEKSServicePolicy + - arn:aws:iam::aws:policy/AmazonEKSVPCResourceController + - arn:aws:iam::aws:policy/AmazonEKSWorkerNodePolicy + AssumeRolePolicyDocument: + Version: '2012-10-17' + Statement: + - Effect: Allow + Principal: + Service: + - "lambda.amazonaws.com" + - "ec2.amazonaws.com" + - "eks.amazonaws.com" + Action: sts:AssumeRole + Policies: + - PolicyName: EKSClusterCreationPolicy + PolicyDocument: + Version: '2012-10-17' + Statement: + - Effect: Allow + Action: + - eks:* + - ec2:* + - cloudformation:* + - iam:RemoveRoleFromInstanceProfile + Resource: '*' + - Effect: Allow + Action: + - iam:* + - sts:* + Resource: + - arn:aws:iam::*:role/*NodeInstanceRole* + - arn:aws:iam::*:role/*MasterInstanceRole* + - arn:aws:iam::*:role/*ServiceRole* + - arn:aws:iam::*:role/aws-service-role/eks* + - arn:aws:iam::*:oidc-provider/* + - PolicyName: CloudWatchLogsPolicy + PolicyDocument: + Version: '2012-10-17' + Statement: + - Effect: Allow + Action: + - logs:CreateLogGroup + - logs:CreateLogStream + - logs:PutLogEvents + Resource: '*' # Allow access to all log groups + + LambdaInvokePermission: + Type: AWS::Lambda::Permission + Properties: + Action: 'lambda:InvokeFunction' + FunctionName: !GetAtt CustomEKSFunction.Arn + Principal: 'cloudformation.amazonaws.com' + DependsOn: + - CustomEKSFunction # Explicit dependency on CustomEKSFunction + +Outputs: + ClusterArn: + Description: ARN of the created EKS cluster + Value: !GetAtt ControlPlane.PhysicalResourceId + Arn: + Description: ARN of the created EKS cluster + Value: !GetAtt ControlPlane.PhysicalResourceId + ClusterName: + Description: Name of the created EKS cluster + Value: !GetAtt ControlPlane.ClusterName diff --git a/pkg/cfn/builder/templates/beta.py b/pkg/cfn/builder/templates/beta.py new file mode 100644 index 0000000000..13626cb69b --- /dev/null +++ b/pkg/cfn/builder/templates/beta.py @@ -0,0 +1,424 @@ +import copy +import json +import boto3 +import os +import logging +import time +import cfnresponse # AWS-provided helper for sending responses + +# Set up logging +logger = logging.getLogger() +logger.setLevel(logging.INFO) + + +def init_eks_client(): + """ + Initializes and returns an EKS client using the endpoint URL from the environment variable. + """ + eks_endpoint = os.environ.get('AWS_ENDPOINT_URL_EKS') + if not eks_endpoint: + raise ValueError("AWS_ENDPOINT_URL_EKS environment variable is not set or is empty") + return boto3.client('eks', endpoint_url=eks_endpoint) + + +def validate_input(event): + """ + Validate that all required fields are present in the event. + """ + required_fields = ['Name', 'RoleArn', 'ResourcesVpcConfig', + 'IAMPrincipalArn', 'STSRoleArn'] + for field in required_fields: + if field not in event['ResourceProperties']: + raise ValueError(f"Missing required field: {field}") + + +def convert_keys_to_lowercase_first_letter(d): + """ + Convert the first character of dictionary keys to lowercase. + """ + if not isinstance(d, dict): + return d # Return as-is if it's not a dictionary + + new_dict = {} + for key, value in d.items(): + # Convert the first character of the key to lowercase + new_key = key[:1].lower() + key[1:] if key else key + + # Recursively process nested dictionaries + if isinstance(value, dict): + new_dict[new_key] = convert_keys_to_lowercase_first_letter(value) + # Process lists only if they contain dictionaries + elif isinstance(value, list): + new_dict[new_key] = [ + convert_keys_to_lowercase_first_letter(item) if isinstance(item, dict) else item + for item in value + ] + else: + new_dict[new_key] = value + return new_dict + + +def replace_boolean_strings(d): + """ + Replace string representations of booleans with actual booleans. + """ + if isinstance(d, (dict, list)): + iterable = d.items() if isinstance(d, dict) else enumerate(d) + for k, v in iterable: + if isinstance(v, str) and v.lower() in {"true", "false"}: + d[k] = v.lower() == "true" + elif isinstance(v, (dict, list)): + replace_boolean_strings(v) + + +def replace_integer_strings(d): + """ + Replace string representations of integers with actual integers. + """ + if isinstance(d, (dict, list)): + iterable = d.items() if isinstance(d, dict) else enumerate(d) + for k, v in iterable: + if isinstance(v, str) and v.isdigit(): + d[k] = int(v) + elif isinstance(v, (dict, list)): + replace_integer_strings(v) + + +def create_access_entry(eks_client, principal_arn, username, cluster_name, entry_type): + """ + Create an access entry for an IAM principal in an EKS cluster. + """ + logger.info(f"Creating access entry in EKS cluster: {cluster_name}") + params = { + 'clusterName': cluster_name, + 'principalArn': principal_arn, + } + if username is not None: + params['username'] = username + if entry_type is not None: + params['type'] = entry_type + + response1 = eks_client.create_access_entry(**params) + logger.info("Access entry called successfully:") + logger.info("Access entry response: " + json.dumps(response1, default=str)) + + if entry_type is not None and entry_type == "STANDARD": + # Associate the admin access policy + policy_arn = 'arn:aws:eks::aws:cluster-access-policy/AmazonEKSClusterAdminPolicy' + params = { + 'clusterName': cluster_name, + 'principalArn': principal_arn, + 'policyArn': policy_arn, + 'accessScope': { + 'type': 'cluster', + 'namespaces': [] + } + } + response2 = eks_client.associate_access_policy(**params) + logger.info("Associate Access Policy called successfully:") + logger.info("Associate Access Policy response: " + json.dumps(response2, default=str)) + return response1 + + +def get_stack_tags(event): + """ + Extracts and returns the tags of a CloudFormation stack from the given event. + """ + stack_name = event['StackId'].split('/')[1] + cf = boto3.client('cloudformation') + response = cf.describe_stacks(StackName=stack_name) + stack_tags = response['Stacks'][0].get('Tags', []) + tag_dict = {tag['Key']: tag['Value'] for tag in stack_tags} + return tag_dict + + +def update_payload_tags(payload, event): + """ + Updates the payload with tags from the CloudFormation stack associated with the given event. + """ + # get the stack level tags + stack_tags = get_stack_tags(event) + logger.info("Stack Tags: " + json.dumps(stack_tags, default=str)) + # Add stack tags to the create_cluster_payload tags + if 'tags' not in payload: + payload['tags'] = {} + logger.info("Cluster Tags: " + json.dumps(payload['tags'], default=str)) + # Ensure 'tags' is a dictionary + if isinstance(payload['tags'], list): + tags_dict = {item['key']: item['value'] for item in payload['tags']} + payload['tags'] = tags_dict + payload['tags'].update(stack_tags) + logger.info("Final Tags: " + json.dumps(payload['tags'], default=str)) + + +def handler(event, context): + """ + Lambda function handler for CloudFormation custom resource. + """ + logger.info("Received event: " + json.dumps(event, default=str)) + + # Validate that the invocation is from CloudFormation + if 'RequestType' not in event: + raise ValueError("Invalid invocation source. This Lambda function can only be invoked by CloudFormation.") + + if event['ResourceType'] == 'Custom::EksCluster': + return cluster_handler(event, context) + + if event['ResourceType'] == 'Custom::EksManagedNodeGroup': + return nodegroup_handler(event, context) + + if event['ResourceType'] == 'Custom::EksAccessEntry': + return access_entry_handler(event, context) + + raise ValueError(f"Invalid resource type {event['ResourceType']}") + + +def cluster_handler(event, context): + """ + Handles the creation, update, and deletion of an EKS cluster as a CloudFormation custom resource. + """ + try: + eks_client = init_eks_client() + + cluster_name = event['ResourceProperties']['Name'] + + # Handle Delete event + if event['RequestType'] == 'Delete': + delete_cluster(eks_client, cluster_name) + cfnresponse.send(event, context, cfnresponse.SUCCESS, {"Message": "Resource deleted"}) + return { + 'PhysicalResourceId': event['PhysicalResourceId'] + } + + # Validate input + validate_input(event) + + iam_principal_arn = event['ResourceProperties']['IAMPrincipalArn'] + sts_role_arn = event['ResourceProperties']['STSRoleArn'] + + # Prepare the create_cluster request payload from the custom resource properties + create_cluster_payload = convert_keys_to_lowercase_first_letter( + copy.deepcopy(event['ResourceProperties'])) + + # cleanup create_cluster_payload as not all fields can be sent to EKS create_cluster API + del create_cluster_payload['serviceToken'] + del create_cluster_payload['iAMPrincipalArn'] + del create_cluster_payload['sTSRoleArn'] + replace_boolean_strings(create_cluster_payload) + + update_payload_tags(create_cluster_payload, event) + + # create and wait for the eks cluster + cluster_details, response = create_cluster(eks_client, cluster_name, create_cluster_payload) + + # Create an access entry for the EKS cluster + create_access_entry(eks_client, iam_principal_arn, sts_role_arn, cluster_name, "STANDARD") + + # Extract required attributes + eventData = { + "Arn": cluster_details['arn'], + "PhysicalResourceId": cluster_details['arn'], + "ClusterName": cluster_details['name'], + "ClusterSecurityGroupId": cluster_details['resourcesVpcConfig']['clusterSecurityGroupId'], + "CertificateAuthorityData": cluster_details['certificateAuthority']['data'], + "Endpoint": cluster_details['endpoint'], + } + + cfnresponse.send(event, context, cfnresponse.SUCCESS, + eventData) + # Return the cluster ARN as the PhysicalResourceId + return { + 'PhysicalResourceId': response['cluster']['arn'], + "ClusterName": response['cluster']['name'], + 'Data': json.dumps(response, default=str) + } + except Exception as e: + logger.error("Error: " + str(e)) + cfnresponse.send(event, context, cfnresponse.FAILED, {"Message": str(e)}) + return None + + +def create_cluster(eks_client, cluster_name, create_cluster_payload): + """ + Create the EKS cluster + """ + try: + # Check if the cluster already exists + logger.info(f"Checking if EKS cluster {cluster_name} already exists") + response = eks_client.describe_cluster(name=cluster_name) + except eks_client.exceptions.ResourceNotFoundException: + logger.info("Creating EKS cluster with payload: " + json.dumps(create_cluster_payload, default=str)) + response = eks_client.create_cluster(**create_cluster_payload) + logger.info("EKS cluster created: " + json.dumps(response, default=str)) + + # Wait for the cluster to become ACTIVE + cluster_details = wait_for_cluster_creation(eks_client, cluster_name) + return cluster_details, response + + +def wait_for_cluster_creation(eks_client, cluster_name): + """ + Wait for the EKS cluster to become ACTIVE. + """ + waiter = eks_client.get_waiter('cluster_active') + waiter.wait(name=cluster_name, + WaiterConfig={'Delay': 10, 'MaxAttempts': 100}) + response = eks_client.describe_cluster(name=cluster_name) + status = response['cluster']['status'] + if status == 'ACTIVE': + logger.info(f"EKS cluster {cluster_name} is now ACTIVE.") + return response['cluster'] + elif status == 'FAILED': + raise Exception(f"EKS cluster {cluster_name} creation failed.") + else: + raise Exception(f"EKS cluster {cluster_name} in {status} bad state.") + + +def delete_cluster(eks_client, cluster_name): + """ + Delete an EKS cluster. + """ + logger.info(f"Deleting EKS cluster: {cluster_name}") + eks_client.delete_cluster(name=cluster_name) + logger.info(f"EKS cluster deleted: {cluster_name}") + + +def nodegroup_handler(event, context): + """ + Handles the creation, update, and deletion of an EKS managed node group as a CloudFormation custom resource. + """ + try: + eks_client = init_eks_client() + iam_client = boto3.client('iam') + + cluster_name = event['ResourceProperties']['ClusterName'] + logger.info(f"cluster name : {cluster_name}") + + nodegroup_name = event['ResourceProperties']['NodegroupName'] + logger.info(f"nodegroup name : {nodegroup_name}") + + # Handle Delete event + if event['RequestType'] == 'Delete': + response = eks_client.describe_nodegroup( + clusterName=cluster_name, + nodegroupName=nodegroup_name + ) + nodegroup_role_arn = response['nodegroup']['nodeRole'] + print(f"IAM Role ARN associated with the EKS node group: {nodegroup_role_arn}") + + role_name = nodegroup_role_arn.split('/')[-1] + response = iam_client.list_instance_profiles_for_role(RoleName=role_name) + instance_profiles = response['InstanceProfiles'] + for profile in instance_profiles: + # Process each profile + instance_profile_name = profile['InstanceProfileName'] + logger.info(f"Processing instance profile: {instance_profile_name}") + + iam_client.remove_role_from_instance_profile( + InstanceProfileName=instance_profile_name, + RoleName=role_name + ) + print(f"Removed role {role_name} from instance profile: {instance_profile_name}") + + eks_client.delete_nodegroup(clusterName=cluster_name, nodegroupName=nodegroup_name) + cfnresponse.send(event, context, cfnresponse.SUCCESS, {"Message": "Resource deleted"}) + return { + 'PhysicalResourceId': event['PhysicalResourceId'] + } + + # Prepare the nodegroup request payload from the custom resource properties + nodegroup_payload = convert_keys_to_lowercase_first_letter( + copy.deepcopy(event['ResourceProperties'])) + del nodegroup_payload['serviceToken'] + replace_boolean_strings(nodegroup_payload) + replace_integer_strings(nodegroup_payload) + + update_payload_tags(nodegroup_payload, event) + + logger.info("EKS nodegroup with payload: " + json.dumps(nodegroup_payload, default=str)) + response = eks_client.create_nodegroup(**nodegroup_payload) + logger.info("EKS nodegroup created: " + json.dumps(response, default=str)) + + # Wait for the nodegroup to become ACTIVE + waiter = eks_client.get_waiter('nodegroup_active') + waiter.wait( + clusterName=cluster_name, + nodegroupName=nodegroup_name, + WaiterConfig={'Delay': 10, 'MaxAttempts': 100} + ) + + # Fetch the nodegroup details after it becomes ACTIVE + response = eks_client.describe_nodegroup( + clusterName=cluster_name, + nodegroupName=nodegroup_name + ) + + status = response['nodegroup']['status'] + if status != 'ACTIVE': + raise Exception(f"EKS node group {nodegroup_name} is not ACTIVE, status is {status}.") + + eventData = { + "Arn": response['nodegroup']['nodegroupArn'], + "PhysicalResourceId": response['nodegroup']['nodegroupArn'], + } + + cfnresponse.send(event, context, cfnresponse.SUCCESS, + eventData) + # Return the cluster ARN as the PhysicalResourceId + return { + 'PhysicalResourceId': response['nodegroup']['nodegroupArn'], + 'Data': json.dumps(response, default=str) + } + + except Exception as e: + logger.error("Error: " + str(e)) + cfnresponse.send(event, context, cfnresponse.FAILED, {"Message": str(e)}) + return None + + +def access_entry_handler(event, context): + """ + Handles the creation, update, and deletion of an EKS access entry as a CloudFormation custom resource. + """ + try: + eks_client = init_eks_client() + + cluster_name = event['ResourceProperties']['ClusterName'] + logger.info(f"cluster name : {cluster_name}") + + principal_arn = event['ResourceProperties']['PrincipalArn'] + logger.info(f"principal arn : {principal_arn}") + + # Handle Delete event + if event['RequestType'] == 'Delete': + logger.info(f"access entry principal arn : {principal_arn}") + + eks_client.delete_access_entry(clusterName=cluster_name, principalArn=principal_arn) + cfnresponse.send(event, context, cfnresponse.SUCCESS, {"Message": "Resource deleted"}) + return { + 'PhysicalResourceId': event['PhysicalResourceId'] + } + + username = event['ResourceProperties']['Username'] if 'Username' in event['ResourceProperties'] else None + logger.info(f"username : {username}") + + entry_type = event['ResourceProperties']['Type'] + logger.info(f"entry type : {entry_type}") + + response = create_access_entry(eks_client, principal_arn, username, cluster_name, entry_type) + logger.info("EKS access entry created: " + json.dumps(response, default=str)) + + eventData = { + "Arn": response['accessEntry']['accessEntryArn'], + "PhysicalResourceId": response['accessEntry']['accessEntryArn'], + } + cfnresponse.send(event, context, cfnresponse.SUCCESS, eventData) + # Return the cluster ARN as the PhysicalResourceId + return { + 'PhysicalResourceId': response['accessEntry']['accessEntryArn'], + 'Data': json.dumps(response, default=str) + } + except Exception as e: + logger.error("Error: " + str(e)) + cfnresponse.send(event, context, cfnresponse.FAILED, {"Message": str(e)}) + return None diff --git a/pkg/cfn/manager/api.go b/pkg/cfn/manager/api.go index d4cd9a2945..5bc1325ab5 100644 --- a/pkg/cfn/manager/api.go +++ b/pkg/cfn/manager/api.go @@ -83,6 +83,7 @@ type StackCollection struct { region string waitTimeout time.Duration sharedTags []types.Tag + stsAPI awsapi.STS } func newTag(key, value string) types.Tag { @@ -112,6 +113,7 @@ func NewStackCollection(provider api.ClusterProvider, spec *api.ClusterConfig) S roleARN: provider.CloudFormationRoleARN(), region: provider.Region(), waitTimeout: provider.WaitTimeout(), + stsAPI: provider.STS(), } } diff --git a/pkg/cfn/manager/cluster.go b/pkg/cfn/manager/cluster.go index 5007dff1a8..2df032353c 100644 --- a/pkg/cfn/manager/cluster.go +++ b/pkg/cfn/manager/cluster.go @@ -36,7 +36,7 @@ func (c *StackCollection) MakeClusterStackNameFromName(name string) string { func (c *StackCollection) createClusterTask(ctx context.Context, errs chan error, supportsManagedNodes bool) error { name := c.MakeClusterStackName() logger.Info("building cluster stack %q", name) - stack := builder.NewClusterResourceSet(c.ec2API, c.region, c.spec, nil, false) + stack := builder.NewClusterResourceSet(c.ec2API, c.stsAPI, c.region, c.spec, nil, false) if err := stack.AddAllResources(ctx); err != nil { return err } @@ -143,7 +143,7 @@ func (c *StackCollection) AppendNewClusterStackResource(ctx context.Context, ext } logger.Info("re-building cluster stack %q", name) - newStack := builder.NewClusterResourceSet(c.ec2API, c.region, c.spec, ¤tResources, extendForOutposts) + newStack := builder.NewClusterResourceSet(c.ec2API, c.stsAPI, c.region, c.spec, ¤tResources, extendForOutposts) if err := newStack.AddAllResources(ctx); err != nil { return false, err } diff --git a/pkg/eks/services_v2.go b/pkg/eks/services_v2.go index bc0d69178c..a94ecf753d 100644 --- a/pkg/eks/services_v2.go +++ b/pkg/eks/services_v2.go @@ -185,7 +185,7 @@ func (s *ServicesV2) EKS() awsapi.EKS { s.eks = eks.NewFromConfig(s.config, func(o *eks.Options) { o.BaseEndpoint = getBaseEndpoint(eks.ServiceID, []string{ "AWS_EKS_ENDPOINT", - "AWS_ENDPOINT_URL_EC2", + "AWS_ENDPOINT_URL_EKS", "AWS_ENDPOINT_URL", }) })