Skip to content

Commit 3e382f0

Browse files
committed
Only mark PipelineRun as cancelled if actually cancelled 🛑
While working on #2369 (flakey tests around cancellation that actually revealed underlying bugs) we were running into a case where trying to cancel a PipelineRun's TaskRuns was failing (due to a race condition). In that case, the PipelineRun would be marked as cancelled and done, even though the PipelineRun was actually still executing in that one or more TaskRuns were actually still running. Now when that happens we will indicate that the PipelineRun is still running and return an error, which will mean that on the next reconcile, the Reconciler will try to reconcile again, and the PipelineRun conditions will reflect what is actually happening. Co-authored-by: Sharon Jerop Kipruto <[email protected]> Fixes #2381
1 parent 1a37fdb commit 3e382f0

File tree

3 files changed

+84
-12
lines changed

3 files changed

+84
-12
lines changed

‎pkg/reconciler/pipelinerun/cancel.go‎

Lines changed: 20 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -36,14 +36,6 @@ import (
3636

3737
// cancelPipelineRun marks the PipelineRun as cancelled and any resolved TaskRun(s) too.
3838
func cancelPipelineRun(logger *zap.SugaredLogger, pr *v1alpha1.PipelineRun, pipelineState []*resources.ResolvedPipelineRunTask, clientSet clientset.Interface) error {
39-
pr.Status.SetCondition(&apis.Condition{
40-
Type: apis.ConditionSucceeded,
41-
Status: corev1.ConditionFalse,
42-
Reason: "PipelineRunCancelled",
43-
Message: fmt.Sprintf("PipelineRun %q was cancelled", pr.Name),
44-
})
45-
// update pr completed time
46-
pr.Status.CompletionTime = &metav1.Time{Time: time.Now()}
4739
errs := []string{}
4840
for _, rprt := range pipelineState {
4941
if rprt.TaskRun == nil {
@@ -65,8 +57,26 @@ func cancelPipelineRun(logger *zap.SugaredLogger, pr *v1alpha1.PipelineRun, pipe
6557
continue
6658
}
6759
}
68-
if len(errs) > 0 {
69-
return fmt.Errorf("error(s) from cancelling TaskRun(s) from PipelineRun %s: %s", pr.Name, strings.Join(errs, "\n"))
60+
// If we successfully cancelled all the TaskRuns, we can consider the PipelineRun cancelled.
61+
if len(errs) == 0 {
62+
pr.Status.SetCondition(&apis.Condition{
63+
Type: apis.ConditionSucceeded,
64+
Status: corev1.ConditionFalse,
65+
Reason: ReasonCancelled,
66+
Message: fmt.Sprintf("PipelineRun %q was cancelled", pr.Name),
67+
})
68+
// update pr completed time
69+
pr.Status.CompletionTime = &metav1.Time{Time: time.Now()}
70+
} else {
71+
e := strings.Join(errs, "\n")
72+
// Indicate that we failed to cancel the PipelineRun
73+
pr.Status.SetCondition(&apis.Condition{
74+
Type: apis.ConditionSucceeded,
75+
Status: corev1.ConditionUnknown,
76+
Reason: ReasonCouldntCancel,
77+
Message: fmt.Sprintf("PipelineRun %q was cancelled but had errors trying to cancel TaskRuns: %s", pr.Name, e),
78+
})
79+
return fmt.Errorf("error(s) from cancelling TaskRun(s) from PipelineRun %s: %s", pr.Name, e)
7080
}
7181
return nil
7282
}

‎pkg/reconciler/pipelinerun/pipelinerun.go‎

Lines changed: 7 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -78,6 +78,12 @@ const (
7878
// ReasonInvalidGraph indicates that the reason for the failure status is that the
7979
// associated Pipeline is an invalid graph (a.k.a wrong order, cycle, …)
8080
ReasonInvalidGraph = "PipelineInvalidGraph"
81+
// ReasonCancelled indicates that a PipelineRun was cancelled.
82+
ReasonCancelled = "PipelineRunCancelled"
83+
// ReasonCouldntCancel indicates that a PipelineRun was cancelled but attempting to update
84+
// all of the running TaskRuns as cancelled failed.
85+
ReasonCouldntCancel = "PipelineRunCouldntCancel"
86+
8187
// pipelineRunAgentName defines logging agent name for PipelineRun Controller
8288
pipelineRunAgentName = "pipeline-controller"
8389

@@ -522,7 +528,7 @@ func (c *Reconciler) reconcile(ctx context.Context, pr *v1alpha1.PipelineRun) er
522528
}
523529
}
524530

525-
// If the pipelinerun is cancelled, cancel tasks and update status
531+
// If the running pipelinerun is cancelled, cancel tasks and update status
526532
if pr.IsCancelled() {
527533
before := pr.Status.GetCondition(apis.ConditionSucceeded)
528534
err := cancelPipelineRun(c.Logger, pr, pipelineState, c.PipelineClientSet)

‎pkg/reconciler/pipelinerun/pipelinerun_test.go‎

Lines changed: 57 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -37,6 +37,8 @@ import (
3737
test "github.com/tektoncd/pipeline/test/v1alpha1"
3838
corev1 "k8s.io/api/core/v1"
3939
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
40+
"k8s.io/apimachinery/pkg/runtime"
41+
k8stesting "k8s.io/client-go/testing"
4042
ktesting "k8s.io/client-go/testing"
4143
"knative.dev/pkg/apis"
4244
duckv1beta1 "knative.dev/pkg/apis/duck/v1beta1"
@@ -1000,6 +1002,60 @@ func TestReconcileWithoutPVC(t *testing.T) {
10001002
}
10011003
}
10021004

1005+
func TestReconcileCancelledFailsTaskRunCancellation(t *testing.T) {
1006+
names.TestingSeed()
1007+
ps := []*v1alpha1.Pipeline{tb.Pipeline("test-pipeline", "foo", tb.PipelineSpec(
1008+
tb.PipelineTask("hello-world-1", "hello-world", tb.Retries(1)),
1009+
))}
1010+
prs := []*v1alpha1.PipelineRun{tb.PipelineRun("test-pipeline-run-with-timeout", "foo",
1011+
tb.PipelineRunSpec("test-pipeline",
1012+
tb.PipelineRunCancelled,
1013+
),
1014+
)}
1015+
ts := []*v1alpha1.Task{tb.Task("hello-world", "foo")}
1016+
tr := []*v1alpha1.TaskRun{tb.TaskRun("test-pipeline-run-with-timeout-hello-world-1-9l9zj", "foo",
1017+
tb.TaskRunStatus(tb.StatusCondition(apis.Condition{Type: apis.ConditionSucceeded, Status: corev1.ConditionUnknown}))),
1018+
}
1019+
1020+
d := test.Data{
1021+
PipelineRuns: prs,
1022+
Pipelines: ps,
1023+
Tasks: ts,
1024+
TaskRuns: tr,
1025+
}
1026+
1027+
testAssets, cancel := getPipelineRunController(t, d)
1028+
defer cancel()
1029+
c := testAssets.Controller
1030+
clients := testAssets.Clients
1031+
1032+
// Make the patch call fail, i.e. make it so that the controller fails to cancel the TaskRun
1033+
clients.Pipeline.PrependReactor("patch", "taskruns", func(action k8stesting.Action) (bool, runtime.Object, error) {
1034+
fmt.Println(action.GetVerb(), action.GetResource())
1035+
return true, nil, fmt.Errorf("i'm sorry Dave, i'm afraid i can't do that")
1036+
})
1037+
1038+
err := c.Reconciler.Reconcile(context.Background(), "foo/test-pipeline-run-with-timeout")
1039+
if err == nil {
1040+
t.Errorf("Expected to see error returned from reconcile after failing to cancel TaskRun but saw none!")
1041+
}
1042+
1043+
// Check that the PipelineRun is still running with correct error message
1044+
reconciledRun, err := clients.Pipeline.TektonV1alpha1().PipelineRuns("foo").Get("test-pipeline-run-with-timeout", metav1.GetOptions{})
1045+
if err != nil {
1046+
t.Fatalf("Somehow had error getting reconciled run out of fake client: %s", err)
1047+
}
1048+
1049+
// The PipelineRun should not be cancelled b/c we couldn't cancel the TaskRun
1050+
condition := reconciledRun.Status.GetCondition(apis.ConditionSucceeded)
1051+
if !condition.IsUnknown() {
1052+
t.Errorf("Expected PipelineRun to still be running since the TaskRun could not be cancelled but succeded condition is %v", condition.Status)
1053+
}
1054+
if condition.Reason != ReasonCouldntCancel {
1055+
t.Errorf("Expected PipelineRun condition to indicate the cancellation failed but reason was %s", condition.Reason)
1056+
}
1057+
}
1058+
10031059
func TestReconcileCancelledPipelineRun(t *testing.T) {
10041060
ps := []*v1alpha1.Pipeline{tb.Pipeline("test-pipeline", "foo", tb.PipelineSpec(
10051061
tb.PipelineTask("hello-world-1", "hello-world", tb.Retries(1)),
@@ -1034,7 +1090,7 @@ func TestReconcileCancelledPipelineRun(t *testing.T) {
10341090
}
10351091

10361092
// The PipelineRun should be still cancelled.
1037-
if reconciledRun.Status.GetCondition(apis.ConditionSucceeded).Reason != "PipelineRunCancelled" {
1093+
if reconciledRun.Status.GetCondition(apis.ConditionSucceeded).Reason != ReasonCancelled {
10381094
t.Errorf("Expected PipelineRun to be cancelled, but condition reason is %s", reconciledRun.Status.GetCondition(apis.ConditionSucceeded))
10391095
}
10401096

0 commit comments

Comments
 (0)