Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
36 changes: 23 additions & 13 deletions pkg/tracejob/job.go
Original file line number Diff line number Diff line change
Expand Up @@ -367,9 +367,9 @@ func (nj *TraceJob) Job() *batchv1.Job {
RequiredDuringSchedulingIgnoredDuringExecution: &apiv1.NodeSelector{
NodeSelectorTerms: []apiv1.NodeSelectorTerm{
apiv1.NodeSelectorTerm{
MatchExpressions: []apiv1.NodeSelectorRequirement{
MatchFields: []apiv1.NodeSelectorRequirement{
apiv1.NodeSelectorRequirement{
Key: "kubernetes.io/hostname",
Key: "metadata.name",
Operator: apiv1.NodeSelectorOpIn,
Values: []string{nj.Target.Node},
},
Expand Down Expand Up @@ -611,23 +611,33 @@ func jobHostname(j batchv1.Job) (string, error) {
return "", fmt.Errorf("node selector terms are empty in node affinity for job")
}

me := nst[0].MatchExpressions

if len(me) == 0 {
return "", fmt.Errorf("node selector terms match expressions are empty in node affinity for job")
// Check MatchFields first (new approach)
mf := nst[0].MatchFields
if len(mf) > 0 {
for _, v := range mf {
if v.Key == "metadata.name" {
if len(v.Values) == 0 {
return "", fmt.Errorf("node name affinity found but no values in it for job")
}
return v.Values[0], nil
}
}
}

for _, v := range me {
if v.Key == "kubernetes.io/hostname" {
if len(v.Values) == 0 {
return "", fmt.Errorf("hostname affinity found but no values in it for job")
// Fallback to MatchExpressions for backward compatibility
me := nst[0].MatchExpressions
if len(me) > 0 {
for _, v := range me {
if v.Key == "kubernetes.io/hostname" {
if len(v.Values) == 0 {
return "", fmt.Errorf("hostname affinity found but no values in it for job")
}
return v.Values[0], nil
}

return v.Values[0], nil
}
}

return "", fmt.Errorf("hostname not found for job")
return "", fmt.Errorf("node name not found for job")
}

// TraceJobStatus is a label for the running status of a trace job at the current time.
Expand Down
97 changes: 97 additions & 0 deletions pkg/tracejob/job_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,8 @@ import (
"github.com/stretchr/testify/assert"
"github.com/stretchr/testify/suite"

batchv1 "k8s.io/api/batch/v1"
apiv1 "k8s.io/api/core/v1"
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
"k8s.io/client-go/kubernetes/fake"
)
Expand Down Expand Up @@ -69,3 +71,98 @@ func (j *jobSuite) TestCreateJobWithGoogleAppSecret() {
assert.Len(j.T(), joblist.Items[0].Spec.Template.Spec.Containers[0].Env, 1)
assert.Equal(j.T(), joblist.Items[0].Spec.Template.Spec.Containers[0].Env[0].Name, "GOOGLE_APPLICATION_CREDENTIALS")
}

func (j *jobSuite) TestJobNodeSelectorUsesNodeName() {
testJobName := "test-node-selector"
testNodeName := "ip-10-0-1-123.ec2.internal"
tj := TraceJob{
Name: testJobName,
Target: TraceJobTarget{
Node: testNodeName,
},
}

job := tj.Job()

// Verify that the job uses MatchFields with metadata.name instead of kubernetes.io/hostname
assert.NotNil(j.T(), job.Spec.Template.Spec.Affinity)
assert.NotNil(j.T(), job.Spec.Template.Spec.Affinity.NodeAffinity)
assert.NotNil(j.T(), job.Spec.Template.Spec.Affinity.NodeAffinity.RequiredDuringSchedulingIgnoredDuringExecution)

nodeSelectorTerms := job.Spec.Template.Spec.Affinity.NodeAffinity.RequiredDuringSchedulingIgnoredDuringExecution.NodeSelectorTerms
assert.Len(j.T(), nodeSelectorTerms, 1)

matchFields := nodeSelectorTerms[0].MatchFields
assert.Len(j.T(), matchFields, 1)

assert.Equal(j.T(), "metadata.name", matchFields[0].Key)
assert.Equal(j.T(), "In", string(matchFields[0].Operator))
assert.Len(j.T(), matchFields[0].Values, 1)
assert.Equal(j.T(), testNodeName, matchFields[0].Values[0])
}

func (j *jobSuite) TestJobHostnameExtraction() {
testNodeName := "ip-10-0-1-123.ec2.internal"

// Test with new MatchFields approach
jobWithMatchFields := &batchv1.Job{
Spec: batchv1.JobSpec{
Template: apiv1.PodTemplateSpec{
Spec: apiv1.PodSpec{
Affinity: &apiv1.Affinity{
NodeAffinity: &apiv1.NodeAffinity{
RequiredDuringSchedulingIgnoredDuringExecution: &apiv1.NodeSelector{
NodeSelectorTerms: []apiv1.NodeSelectorTerm{
apiv1.NodeSelectorTerm{
MatchFields: []apiv1.NodeSelectorRequirement{
apiv1.NodeSelectorRequirement{
Key: "metadata.name",
Operator: apiv1.NodeSelectorOpIn,
Values: []string{testNodeName},
},
},
},
},
},
},
},
},
},
},
}

nodeName, err := jobHostname(*jobWithMatchFields)
assert.Nil(j.T(), err)
assert.Equal(j.T(), testNodeName, nodeName)

// Test backward compatibility with old MatchExpressions approach
jobWithMatchExpressions := &batchv1.Job{
Spec: batchv1.JobSpec{
Template: apiv1.PodTemplateSpec{
Spec: apiv1.PodSpec{
Affinity: &apiv1.Affinity{
NodeAffinity: &apiv1.NodeAffinity{
RequiredDuringSchedulingIgnoredDuringExecution: &apiv1.NodeSelector{
NodeSelectorTerms: []apiv1.NodeSelectorTerm{
apiv1.NodeSelectorTerm{
MatchExpressions: []apiv1.NodeSelectorRequirement{
apiv1.NodeSelectorRequirement{
Key: "kubernetes.io/hostname",
Operator: apiv1.NodeSelectorOpIn,
Values: []string{"ip-10-0-1-123"},
},
},
},
},
},
},
},
},
},
},
}

hostname, err := jobHostname(*jobWithMatchExpressions)
assert.Nil(j.T(), err)
assert.Equal(j.T(), "ip-10-0-1-123", hostname)
}
7 changes: 1 addition & 6 deletions pkg/tracejob/selected_target.go
Original file line number Diff line number Diff line change
Expand Up @@ -114,12 +114,7 @@ func ResolveTraceJobTarget(clientset kubernetes.Interface, resource, container,
return nil, errors.NewErrorInvalid(fmt.Sprintf("Failed to locate a node for %s %v", resourceID, err))
}

labels := node.GetLabels()
val, ok := labels["kubernetes.io/hostname"]
if !ok {
return nil, errors.NewErrorInvalid("label kubernetes.io/hostname not found in node")
}
target.Node = val
target.Node = node.Name

case "pod":
podClient := clientset.CoreV1().Pods(targetNamespace)
Expand Down
198 changes: 198 additions & 0 deletions pkg/tracejob/selected_target_test.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,198 @@
package tracejob

import (
"context"
"testing"

"github.com/stretchr/testify/assert"
"github.com/stretchr/testify/suite"

appsv1 "k8s.io/api/apps/v1"
v1 "k8s.io/api/core/v1"
"k8s.io/apimachinery/pkg/api/resource"
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
"k8s.io/client-go/kubernetes/fake"
)

type selectedTargetSuite struct {
suite.Suite
clientset *fake.Clientset
}

func TestSelectedTargetSuite(t *testing.T) {
suite.Run(t, &selectedTargetSuite{})
}

func (s *selectedTargetSuite) SetupTest() {
s.clientset = fake.NewSimpleClientset()
}

func (s *selectedTargetSuite) TestResolveNodeTargetUsesNodeName() {
// Create a test node with fully qualified name and different hostname label
testNodeName := "ip-10-0-1-123.ec2.internal"
testHostnameLabel := "ip-10-0-1-123"

node := &v1.Node{
ObjectMeta: metav1.ObjectMeta{
Name: testNodeName,
Labels: map[string]string{
"kubernetes.io/hostname": testHostnameLabel,
},
},
Status: v1.NodeStatus{
Allocatable: v1.ResourceList{
v1.ResourcePods: resource.MustParse("110"),
},
},
}

_, err := s.clientset.CoreV1().Nodes().Create(context.TODO(), node, metav1.CreateOptions{})
assert.Nil(s.T(), err)

// Test resolving the node target
target, err := ResolveTraceJobTarget(s.clientset, testNodeName, "", "")

assert.Nil(s.T(), err)
assert.NotNil(s.T(), target)
// The target should use the actual node name, not the hostname label
assert.Equal(s.T(), testNodeName, target.Node)
}

func (s *selectedTargetSuite) TestResolvePodTargetUsesNodeName() {
// Create a test node
testNodeName := "ip-10-0-1-123.ec2.internal"
node := &v1.Node{
ObjectMeta: metav1.ObjectMeta{
Name: testNodeName,
Labels: map[string]string{
"kubernetes.io/hostname": "ip-10-0-1-123",
},
},
Status: v1.NodeStatus{
Allocatable: v1.ResourceList{
v1.ResourcePods: resource.MustParse("110"),
},
},
}

_, err := s.clientset.CoreV1().Nodes().Create(context.TODO(), node, metav1.CreateOptions{})
assert.Nil(s.T(), err)

// Create a test pod scheduled on the node
pod := &v1.Pod{
ObjectMeta: metav1.ObjectMeta{
Name: "test-pod",
Namespace: "default",
UID: "test-pod-uid",
},
Spec: v1.PodSpec{
NodeName: testNodeName,
Containers: []v1.Container{
{
Name: "test-container",
},
},
},
Status: v1.PodStatus{
ContainerStatuses: []v1.ContainerStatus{
{
Name: "test-container",
ContainerID: "docker://abc123",
},
},
},
}

_, err = s.clientset.CoreV1().Pods("default").Create(context.TODO(), pod, metav1.CreateOptions{})
assert.Nil(s.T(), err)

// Test resolving the pod target
target, err := ResolveTraceJobTarget(s.clientset, "pod/test-pod", "test-container", "default")

assert.Nil(s.T(), err)
assert.NotNil(s.T(), target)
// The target should use the actual node name from pod.Spec.NodeName
assert.Equal(s.T(), testNodeName, target.Node)
assert.Equal(s.T(), "test-pod-uid", target.PodUID)
assert.Equal(s.T(), "abc123", target.ContainerID)
}

func (s *selectedTargetSuite) TestResolveDeploymentTargetUsesNodeName() {
// Create a test node
testNodeName := "ip-10-0-1-123.ec2.internal"
node := &v1.Node{
ObjectMeta: metav1.ObjectMeta{
Name: testNodeName,
Labels: map[string]string{
"kubernetes.io/hostname": "ip-10-0-1-123",
},
},
Status: v1.NodeStatus{
Allocatable: v1.ResourceList{
v1.ResourcePods: resource.MustParse("110"),
},
},
}

_, err := s.clientset.CoreV1().Nodes().Create(context.TODO(), node, metav1.CreateOptions{})
assert.Nil(s.T(), err)

// Create a test deployment
deployment := &appsv1.Deployment{
ObjectMeta: metav1.ObjectMeta{
Name: "test-deployment",
Namespace: "default",
},
Spec: appsv1.DeploymentSpec{
Selector: &metav1.LabelSelector{
MatchLabels: map[string]string{
"app": "test-app",
},
},
},
}

_, err = s.clientset.AppsV1().Deployments("default").Create(context.TODO(), deployment, metav1.CreateOptions{})
assert.Nil(s.T(), err)

// Create a test pod for the deployment
pod := &v1.Pod{
ObjectMeta: metav1.ObjectMeta{
Name: "test-deployment-pod",
Namespace: "default",
UID: "test-pod-uid",
Labels: map[string]string{
"app": "test-app",
},
},
Spec: v1.PodSpec{
NodeName: testNodeName,
Containers: []v1.Container{
{
Name: "test-container",
},
},
},
Status: v1.PodStatus{
ContainerStatuses: []v1.ContainerStatus{
{
Name: "test-container",
ContainerID: "docker://def456",
},
},
},
}

_, err = s.clientset.CoreV1().Pods("default").Create(context.TODO(), pod, metav1.CreateOptions{})
assert.Nil(s.T(), err)

// Test resolving the deployment target
target, err := ResolveTraceJobTarget(s.clientset, "deployment/test-deployment", "test-container", "default")

assert.Nil(s.T(), err)
assert.NotNil(s.T(), target)
// The target should use the actual node name from pod.Spec.NodeName
assert.Equal(s.T(), testNodeName, target.Node)
assert.Equal(s.T(), "test-pod-uid", target.PodUID)
assert.Equal(s.T(), "def456", target.ContainerID)
}