Skip to content

Commit de62999

Browse files
Automated AMI management and Upgrade locking
Automated AMI management InstanceGroups can now set the image configuration to "latest". This will result the ami value being retrieved from a ssm parameter (https://docs.aws.amazon.com/eks/latest/userguide/retrieve-ami-id.html). This will ensure that nodes within an InstanceGroup are kept up-to-date and is especially useful in development clusters. Automated AMI management supports retrieving amazon amis for amazon linux 2, bottlerocket and windows nodes. This can be configured using the annotation `instancemgr.keikoproj.io/os-family`. Upgrade locking InstanceGroups can now set an annotation `instancemgr.keikoproj.io/lock-upgrades="true"` which will prevent the InstanceGroup from entering the InitUpgrade state. This is useful for controlling when the nodes of an InstanceGroup can be upgraded, pairing well with the automated AMI management feature. Resolves: 320 Signed-off-by: Sebastian Cole <[email protected]>
1 parent 19de2db commit de62999

34 files changed

+794
-74
lines changed

.github/DEVELOPER.md

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -55,6 +55,13 @@ You can also run `make coverage` to generate a coverage report.
5555

5656
## Running BDD tests
5757

58+
### Dependencies
59+
60+
1. You will need an existing EKS cluster running with the connection details exported into a kube config file.
61+
2. [Keikoproj Minion-Manager](https://github.com/keikoproj/minion-manager) must also be running in the cluster
62+
3. Instance Manager needs to be started outside of the bdd test suite
63+
64+
5865
Export some variables and run `make bdd` to run a functional e2e test.
5966

6067
### Example
@@ -96,3 +103,5 @@ testing: warning: no tests to run
96103
PASS
97104
ok github.com/keikoproj/instance-manager/test-bdd 1362.336s [no tests to run]
98105
```
106+
107+
Note: If your test cluster uses `InstanceGroups` to run core components, annotating the namespace with `instancemgr.keikoproj.io/config-excluded="true"` can help prevent unexpected disruption.

api/v1alpha1/instancegroup_types.go

Lines changed: 16 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -47,8 +47,9 @@ const (
4747
ReconcileModified ReconcileState = "ReconcileModified"
4848

4949
// End States
50-
ReconcileReady ReconcileState = "Ready"
51-
ReconcileErr ReconcileState = "Error"
50+
ReconcileLocked ReconcileState = "Locked"
51+
ReconcileReady ReconcileState = "Ready"
52+
ReconcileErr ReconcileState = "Error"
5253

5354
// Userdata bootstrap stages
5455
PreBootstrapStage = "PreBootstrap"
@@ -76,6 +77,8 @@ const (
7677
HostPlacementTenancyType = "host"
7778
DefaultPlacementTenancyType = "default"
7879
DedicatedPlacementTenancyType = "dedicated"
80+
81+
ImageLatestValue = "latest"
7982
)
8083

8184
type ContainerRuntime string
@@ -87,6 +90,8 @@ const (
8790

8891
DockerRuntime ContainerRuntime = "dockerd"
8992
ContainerDRuntime ContainerRuntime = "containerd"
93+
94+
UpgradeLockedAnnotationKey = "instancemgr.keikoproj.io/lock-upgrades"
9095
)
9196

9297
var (
@@ -392,6 +397,15 @@ func (ig *InstanceGroup) GetUpgradeStrategy() *AwsUpgradeStrategy {
392397
func (ig *InstanceGroup) SetUpgradeStrategy(strategy AwsUpgradeStrategy) {
393398
ig.Spec.AwsUpgradeStrategy = strategy
394399
}
400+
func (ig *InstanceGroup) Locked() bool {
401+
annotations := ig.GetAnnotations()
402+
if val, ok := annotations[UpgradeLockedAnnotationKey]; ok {
403+
if strings.EqualFold(val, "true") {
404+
return true
405+
}
406+
}
407+
return false
408+
}
395409

396410
func (s *EKSSpec) Validate() error {
397411
var (
@@ -521,7 +535,6 @@ func (c *EKSConfiguration) Validate() error {
521535
c.SuspendedProcesses = processes
522536
}
523537

524-
525538
if c.BootstrapOptions != nil {
526539
if c.BootstrapOptions.ContainerRuntime != "" && !contains(AllowedContainerRuntimes, c.BootstrapOptions.ContainerRuntime) {
527540
return errors.Errorf("validation failed, 'bootstrapOptions.containerRuntime' must be one of %+v", AllowedContainerRuntimes)

api/v1alpha1/instancegroup_types_test.go

Lines changed: 43 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -18,6 +18,7 @@ import (
1818
"testing"
1919

2020
"github.com/aws/aws-sdk-go/aws"
21+
v1 "k8s.io/apimachinery/pkg/apis/meta/v1"
2122
)
2223

2324
type EksUnitTest struct {
@@ -117,13 +118,13 @@ func TestInstanceGroupSpecValidate(t *testing.T) {
117118
MinSize: 1,
118119
Type: "LaunchTemplate",
119120
EKSConfiguration: &EKSConfiguration{
120-
BootstrapOptions: &BootstrapOptions{ContainerRuntime: "foo"},
121-
EksClusterName: "my-eks-cluster",
122-
NodeSecurityGroups: []string{"sg-123456789"},
123-
Image: "ami-12345",
124-
InstanceType: "m5.large",
125-
KeyPairName: "thisShouldBeOptional",
126-
Subnets: []string{"subnet-1111111", "subnet-222222"},
121+
BootstrapOptions: &BootstrapOptions{ContainerRuntime: "foo"},
122+
EksClusterName: "my-eks-cluster",
123+
NodeSecurityGroups: []string{"sg-123456789"},
124+
Image: "ami-12345",
125+
InstanceType: "m5.large",
126+
KeyPairName: "thisShouldBeOptional",
127+
Subnets: []string{"subnet-1111111", "subnet-222222"},
127128
},
128129
}, nil, nil),
129130
},
@@ -353,6 +354,41 @@ func TestInstanceGroupSpecValidate(t *testing.T) {
353354
}
354355
}
355356

357+
func TestLockedAnnotation(t *testing.T) {
358+
tests := []struct {
359+
name string
360+
annotation string
361+
expected bool
362+
}{
363+
{
364+
name: "Locked",
365+
annotation: "true",
366+
expected: true,
367+
},
368+
{
369+
name: "Unlocked",
370+
annotation: "false",
371+
expected: false,
372+
},
373+
}
374+
375+
for _, test := range tests {
376+
t.Run(test.name, func(t *testing.T) {
377+
testIg := &InstanceGroup{
378+
ObjectMeta: v1.ObjectMeta{
379+
Annotations: map[string]string{
380+
UpgradeLockedAnnotationKey: test.annotation,
381+
},
382+
},
383+
}
384+
res := testIg.Locked()
385+
if res != test.expected {
386+
t.Errorf("%v: got %v, expected %v", test.name, res, test.expected)
387+
}
388+
})
389+
}
390+
}
391+
356392
func basicFargateSpec() *EKSFargateSpec {
357393
return &EKSFargateSpec{
358394
ClusterName: "",

controllers/instancegroup_controller.go

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -246,7 +246,7 @@ func (r *InstanceGroupReconciler) IsNamespaceAnnotated(namespace, key, value str
246246
}
247247

248248
annotations := unstructuredNamespace.GetAnnotations()
249-
if kubeprovider.HasAnnotation(annotations, key, value) {
249+
if kubeprovider.HasAnnotationWithValue(annotations, key, value) {
250250
return true
251251
}
252252
}

controllers/interface.go

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -16,6 +16,7 @@ type CloudDeployer interface {
1616
GetState() v1alpha.ReconcileState // Gets the current state type of the instance group
1717
SetState(v1alpha.ReconcileState) // Sets the current state of the instance group
1818
IsReady() bool // Returns true if state is Ready
19+
Locked() bool // Returns true if instanceGroup is locked
1920
}
2021

2122
func HandleReconcileRequest(d CloudDeployer) error {
@@ -54,6 +55,11 @@ func HandleReconcileRequest(d CloudDeployer) error {
5455

5556
// CRUD Nodes Upgrade Strategy
5657
if d.GetState() == v1alpha.ReconcileInitUpgrade {
58+
// Locked
59+
if d.Locked() {
60+
d.SetState(v1alpha.ReconcileLocked)
61+
return nil
62+
}
5763
err = d.UpgradeNodes()
5864
if err != nil {
5965
return err
@@ -67,12 +73,18 @@ func HandleReconcileRequest(d CloudDeployer) error {
6773

6874
// Bootstrap Nodes
6975
if d.IsReady() {
76+
7077
err = d.BootstrapNodes()
7178
if err != nil {
7279
return err
7380
}
7481

7582
if d.GetState() == v1alpha.ReconcileInitUpgrade {
83+
// Locked
84+
if d.Locked() {
85+
d.SetState(v1alpha.ReconcileLocked)
86+
return nil
87+
}
7688
err = d.UpgradeNodes()
7789
if err != nil {
7890
return err

controllers/providers/aws/aws.go

Lines changed: 14 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -30,6 +30,7 @@ import (
3030
"github.com/aws/aws-sdk-go/service/ec2/ec2iface"
3131
"github.com/aws/aws-sdk-go/service/eks/eksiface"
3232
"github.com/aws/aws-sdk-go/service/iam/iamiface"
33+
"github.com/aws/aws-sdk-go/service/ssm/ssmiface"
3334
"github.com/pkg/errors"
3435
ctrl "sigs.k8s.io/controller-runtime"
3536
)
@@ -55,6 +56,7 @@ const (
5556
DescribeLaunchTemplateVersionsTTL time.Duration = 60 * time.Second
5657
DescribeInstanceTypesTTL time.Duration = 24 * time.Hour
5758
DescribeInstanceTypeOfferingTTL time.Duration = 1 * time.Hour
59+
GetParameterTTL time.Duration = 1 * time.Hour
5860

5961
CacheBackgroundPruningInterval time.Duration = 1 * time.Hour
6062
CacheMaxItems int64 = 250
@@ -117,6 +119,7 @@ type AwsWorker struct {
117119
EksClient eksiface.EKSAPI
118120
IamClient iamiface.IAMAPI
119121
Ec2Client ec2iface.EC2API
122+
SsmClient ssmiface.SSMAPI
120123
Ec2Metadata *ec2metadata.EC2Metadata
121124
Parameters map[string]interface{}
122125
}
@@ -246,10 +249,9 @@ func GetScalingConfigName(group *autoscaling.Group) string {
246249
}
247250

248251
func GetInstanceTypeNetworkInfo(instanceTypes []*ec2.InstanceTypeInfo, instanceType string) *ec2.NetworkInfo {
249-
for _, instanceTypeInfo := range instanceTypes {
250-
if aws.StringValue(instanceTypeInfo.InstanceType) == instanceType {
251-
return instanceTypeInfo.NetworkInfo
252-
}
252+
i := GetInstanceTypeInfo(instanceTypes, instanceType)
253+
if i != nil {
254+
return i.NetworkInfo
253255
}
254256
return nil
255257
}
@@ -262,3 +264,11 @@ func GetInstanceTypeInfo(instanceTypes []*ec2.InstanceTypeInfo, instanceType str
262264
}
263265
return nil
264266
}
267+
268+
func GetInstanceTypeArchitectures(instanceTypes []*ec2.InstanceTypeInfo, instanceType string) []string {
269+
i := GetInstanceTypeInfo(instanceTypes, instanceType)
270+
if i != nil {
271+
return aws.StringValueSlice((*i).ProcessorInfo.SupportedArchitectures)
272+
}
273+
return nil
274+
}

controllers/providers/aws/ssm.go

Lines changed: 72 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,72 @@
1+
package aws
2+
3+
import (
4+
"fmt"
5+
6+
"github.com/aws/aws-sdk-go/aws"
7+
"github.com/aws/aws-sdk-go/aws/request"
8+
"github.com/aws/aws-sdk-go/aws/session"
9+
"github.com/aws/aws-sdk-go/service/ssm"
10+
"github.com/aws/aws-sdk-go/service/ssm/ssmiface"
11+
"github.com/keikoproj/aws-sdk-go-cache/cache"
12+
"github.com/keikoproj/instance-manager/controllers/common"
13+
)
14+
15+
type architectureMap map[string]string
16+
17+
const (
18+
EksOptimisedAmiPath = "/aws/service/eks/optimized-ami/%s/amazon-linux-2/recommended/image_id"
19+
EksOptimisedAmazonLinux2Arm64 = "/aws/service/eks/optimized-ami/%s/amazon-linux-2-arm64/recommended/image_id"
20+
EksOptimisedBottlerocket = "/aws/service/bottlerocket/aws-k8s-%s/x86_64/latest/image_id"
21+
EksOptimisedBottlerocketArm64 = "/aws/service/bottlerocket/aws-k8s-%s/arm64/latest/image_id"
22+
EksOptimisedWindowsCore = "/aws/service/ami-windows-latest/Windows_Server-2019-English-Core-EKS_Optimized-%s/image_id"
23+
EksOptimisedWindowsFull = "/aws/service/ami-windows-latest/Windows_Server-2019-English-Full-EKS_Optimized-%s/image_id"
24+
)
25+
26+
var (
27+
EksAmis = map[string]architectureMap{
28+
"amazonlinux2": architectureMap{
29+
"x86_64": EksOptimisedAmiPath,
30+
"arm64": EksOptimisedAmazonLinux2Arm64,
31+
},
32+
"bottlerocket": architectureMap{
33+
"x86_64": EksOptimisedBottlerocket,
34+
"arm64": EksOptimisedBottlerocketArm64,
35+
},
36+
"windows": architectureMap{
37+
"x86_64": EksOptimisedWindowsCore,
38+
},
39+
}
40+
)
41+
42+
func GetAwsSsmClient(region string, cacheCfg *cache.Config, maxRetries int, collector *common.MetricsCollector) ssmiface.SSMAPI {
43+
config := aws.NewConfig().WithRegion(region).WithCredentialsChainVerboseErrors(true)
44+
config = request.WithRetryer(config, NewRetryLogger(maxRetries, collector))
45+
sess, err := session.NewSession(config)
46+
if err != nil {
47+
panic(err)
48+
}
49+
cache.AddCaching(sess, cacheCfg)
50+
cacheCfg.SetCacheTTL("ssm", "GetParameter", GetParameterTTL)
51+
sess.Handlers.Complete.PushFront(func(r *request.Request) {
52+
ctx := r.HTTPRequest.Context()
53+
log.V(1).Info("AWS API call",
54+
"cacheHit", cache.IsCacheHit(ctx),
55+
"service", r.ClientInfo.ServiceName,
56+
"operation", r.Operation.Name,
57+
)
58+
})
59+
return ssm.New(sess)
60+
}
61+
62+
func (w *AwsWorker) GetEksLatestAmi(OSFamily string, arch string, kubernetesVersion string) (string, error) {
63+
input := &ssm.GetParameterInput{
64+
Name: aws.String(fmt.Sprintf(EksAmis[OSFamily][arch], kubernetesVersion)),
65+
}
66+
67+
output, err := w.SsmClient.GetParameter(input)
68+
if err != nil {
69+
return "", err
70+
}
71+
return aws.StringValue(output.Parameter.Value), nil
72+
}

controllers/providers/kubernetes/crd.go

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -329,7 +329,7 @@ func GetResources(kube dynamic.Interface, instanceGroup *v1alpha1.InstanceGroup,
329329

330330
annotations := ru.GetAnnotations()
331331

332-
if HasAnnotation(annotations, OwnershipAnnotationKey, OwnershipAnnotationValue) && HasAnnotation(annotations, ScopeAnnotationKey, status.GetActiveScalingGroupName()) {
332+
if HasAnnotationWithValue(annotations, OwnershipAnnotationKey, OwnershipAnnotationValue) && HasAnnotationWithValue(annotations, ScopeAnnotationKey, status.GetActiveScalingGroupName()) {
333333
if IsPathValue(ru, statusJSONPath, completedStatus) || IsPathValue(ru, statusJSONPath, errorStatus) {
334334
// if resource is not completed and not failed, it must be still active
335335
inactiveResources = append(inactiveResources, ru)

controllers/providers/kubernetes/utils.go

Lines changed: 8 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -116,7 +116,14 @@ func AddAnnotation(u *unstructured.Unstructured, key, value string) {
116116
u.SetAnnotations(annotations)
117117
}
118118

119-
func HasAnnotation(annotations map[string]string, key, value string) bool {
119+
func HasAnnotation(annotations map[string]string, key string) bool {
120+
if _, ok := annotations[key]; ok {
121+
return true
122+
}
123+
return false
124+
}
125+
126+
func HasAnnotationWithValue(annotations map[string]string, key, value string) bool {
120127
if val, ok := annotations[key]; ok {
121128
if strings.EqualFold(val, value) {
122129
return true

0 commit comments

Comments
 (0)