Skip to content

Commit 410252f

Browse files
committed
Emit events for all TaskRun lifecycle events
Start emitting events for additional TaskRun lifecyle events: - taskrun started - taskrun timeout Introduce pre-run and post-run functions that are invoked asynchronously when the taskrun starts and completes, to emit events. These same functions shall be used to trigger any other async behaviour on start/stop of taskruns. Add documentation on events. Fixes #2328 Work towards #2082
1 parent 713d2fd commit 410252f

File tree

7 files changed

+96
-15
lines changed

7 files changed

+96
-15
lines changed

docs/events.md

Lines changed: 39 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,39 @@
1+
<!--
2+
---
3+
linkTitle: "Events"
4+
weight: 2
5+
---
6+
-->
7+
# Events
8+
9+
Tekton runtime resources, specifically `TaskRuns` and `PipelineRuns`,
10+
emit events when they are executed, so that users can monitor their lifecycle
11+
and react to it. Tekton emits [kubernetes events](https://kubernetes.io/docs/reference/generated/kubernetes-api/v1.18/#event-v1-core), that can be retrieve from the resource via
12+
`kubectl describe [resource]`.
13+
14+
No events are emitted for `Conditions` today.
15+
16+
## TaskRuns
17+
18+
`TaskRun` events are generated for the following `Reasons`:
19+
- `Started`: this is triggered the first time the `TaskRun` is picked by the
20+
reconciler from its work queue, so it only happens if web-hook validation was
21+
successful. Note that this event does not imply that a step started executing,
22+
as several conditions must be met first:
23+
- task and bound resource validation must be successful
24+
- attached conditions must run successfully
25+
- the `Pod` associated to the `TaskRun` must be successfully scheduled
26+
- `Succeeded`: this is triggered once all steps in the `TaskRun` are executed
27+
successfully, including post-steps injected by Tekton.
28+
- `Failed`: this is triggered if the `TaskRun` is completed, but not successfully.
29+
Causes of failure may be: one the steps failed, the `TaskRun` was cancelled or
30+
the `TaskRun` timed out.
31+
32+
## PipelineRuns
33+
34+
`PipelineRun` events are generated for the following `Reasons`:
35+
- `Succeeded`: this is triggered once all `Tasks` reachable via the DAG are
36+
executed successfully.
37+
- `Failed`: this is triggered if the `PipelineRun` is completed, but not
38+
successfully. Causes of failure may be: one the `Tasks` failed or the
39+
`PipelineRun` was cancelled.

docs/pipelineruns.md

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -29,6 +29,7 @@ Creation of a `PipelineRun` will trigger the creation of
2929
- [Workspaces](#workspaces)
3030
- [Cancelling a PipelineRun](#cancelling-a-pipelinerun)
3131
- [LimitRanges](#limitranges)
32+
- [Events](events.md#pipelineruns)
3233

3334
## Syntax
3435

docs/taskruns.md

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -30,14 +30,14 @@ A `TaskRun` runs until all `steps` have completed or until a failure occurs.
3030
- [Steps](#steps)
3131
- [Results](#results)
3232
- [Cancelling a TaskRun](#cancelling-a-taskrun)
33+
- [Sidecars](#sidecars)
34+
- [LimitRanges](#limitranges)
35+
- [Events](events.md#taskruns)
3336
- [Examples](#examples)
3437
- [Example TaskRun](#example-taskrun)
3538
- [Example with embedded specs](#example-with-embedded-specs)
3639
- [Example Task Reuse](#example-task-reuse)
3740
- [Using a `ServiceAccount`](#using-a-serviceaccount)
38-
- [Sidecars](#sidecars)
39-
- [LimitRanges](#limitranges)
40-
4141
---
4242

4343
## Syntax

pkg/reconciler/event.go

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -31,6 +31,10 @@ func EmitEvent(c record.EventRecorder, beforeCondition *apis.Condition, afterCon
3131
c.Event(object, corev1.EventTypeNormal, "Succeeded", afterCondition.Message)
3232
} else if afterCondition.Status == corev1.ConditionFalse {
3333
c.Event(object, corev1.EventTypeWarning, "Failed", afterCondition.Message)
34+
} else {
35+
if beforeCondition == nil {
36+
c.Event(object, corev1.EventTypeNormal, "Started", "")
37+
}
3438
}
3539
}
3640
}

pkg/reconciler/event_test.go

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -80,6 +80,14 @@ func TestEmitEvent(t *testing.T) {
8080
Status: corev1.ConditionTrue,
8181
},
8282
expectEvent: true,
83+
}, {
84+
name: "nil to unknown",
85+
before: nil,
86+
after: &apis.Condition{
87+
Type: apis.ConditionSucceeded,
88+
Status: corev1.ConditionUnknown,
89+
},
90+
expectEvent: true,
8391
}}
8492

8593
for _, ts := range testcases {

pkg/reconciler/taskrun/resources/cloudevent/cloud_event_controller.go

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -66,8 +66,7 @@ func cloudEventDeliveryFromTargets(targets []string) []v1alpha1.CloudEventDelive
6666
}
6767

6868
// SendCloudEvents is used by the TaskRun controller to send cloud events once
69-
// the TaskRun is complete. `tr` is used to obtain the list of targets but also
70-
// to construct the body of the
69+
// the TaskRun is complete. `tr` is used to obtain the list of targets
7170
func SendCloudEvents(tr *v1alpha1.TaskRun, ceclient CEClient, logger *zap.SugaredLogger) error {
7271
logger = logger.With(zap.String("taskrun", tr.Name))
7372

pkg/reconciler/taskrun/taskrun.go

Lines changed: 40 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -104,11 +104,15 @@ func (c *Reconciler) Reconcile(ctx context.Context, key string) error {
104104

105105
// If the TaskRun is just starting, this will also set the starttime,
106106
// from which the timeout will immediately begin counting down.
107-
tr.Status.InitializeConditions()
108-
// In case node time was not synchronized, when controller has been scheduled to other nodes.
109-
if tr.Status.StartTime.Sub(tr.CreationTimestamp.Time) < 0 {
110-
c.Logger.Warnf("TaskRun %s createTimestamp %s is after the taskRun started %s", tr.GetRunKey(), tr.CreationTimestamp, tr.Status.StartTime)
111-
tr.Status.StartTime = &tr.CreationTimestamp
107+
if !tr.HasStarted() {
108+
tr.Status.InitializeConditions()
109+
// In case node time was not synchronized, when controller has been scheduled to other nodes.
110+
if tr.Status.StartTime.Sub(tr.CreationTimestamp.Time) < 0 {
111+
c.Logger.Warnf("TaskRun %s createTimestamp %s is after the taskRun started %s", tr.GetRunKey(), tr.CreationTimestamp, tr.Status.StartTime)
112+
tr.Status.StartTime = &tr.CreationTimestamp
113+
}
114+
// Run asnyc startup hooks
115+
go c.preRunAsyncHook(ctx, tr)
112116
}
113117

114118
// If the TaskRun is complete, run some post run fixtures when applicable
@@ -163,8 +167,7 @@ func (c *Reconciler) Reconcile(ctx context.Context, key string) error {
163167
if tr.IsCancelled() {
164168
before := tr.Status.GetCondition(apis.ConditionSucceeded)
165169
err := cancelTaskRun(tr, c.KubeClientSet, c.Logger)
166-
after := tr.Status.GetCondition(apis.ConditionSucceeded)
167-
reconciler.EmitEvent(c.Recorder, before, after, tr)
170+
go c.postRunAsyncHook(ctx, tr, before)
168171
return err
169172
}
170173

@@ -173,8 +176,7 @@ func (c *Reconciler) Reconcile(ctx context.Context, key string) error {
173176
if tr.HasTimedOut() {
174177
before := tr.Status.GetCondition(apis.ConditionSucceeded)
175178
err := timeoutTaskRun(tr, c.KubeClientSet, c.Logger)
176-
after := tr.Status.GetCondition(apis.ConditionSucceeded)
177-
reconciler.EmitEvent(c.Recorder, before, after, tr)
179+
go c.postRunAsyncHook(ctx, tr, before)
178180
return err
179181
}
180182

@@ -248,6 +250,26 @@ func (c *Reconciler) getTaskFunc(tr *v1alpha1.TaskRun) (resources.GetTask, v1alp
248250
return gtFunc, kind
249251
}
250252

253+
// Run any async logic that may be required at start-up time. This method is used
254+
// to emit events, notifications or any other async operation
255+
func (c *Reconciler) preRunAsyncHook(ctx context.Context, tr *v1alpha1.TaskRun) {
256+
c.Logger.Infof("preRunAsyncHook: %s", tr.Name)
257+
258+
// Emit event
259+
afterCondition := tr.Status.GetCondition(apis.ConditionSucceeded)
260+
reconciler.EmitEvent(c.Recorder, nil, afterCondition, tr)
261+
}
262+
263+
// Run any async logic that may be required once the tr is successfully reconciled
264+
// This method is used to emit events, notifications or any other async operation
265+
func (c *Reconciler) postRunAsyncHook(ctx context.Context, tr *v1alpha1.TaskRun, beforeCondition *apis.Condition) {
266+
c.Logger.Infof("postRunAsyncHook: %s", tr.Name)
267+
268+
// Emit event
269+
afterCondition := tr.Status.GetCondition(apis.ConditionSucceeded)
270+
reconciler.EmitEvent(c.Recorder, beforeCondition, afterCondition, tr)
271+
}
272+
251273
func (c *Reconciler) reconcile(ctx context.Context, tr *v1alpha1.TaskRun) error {
252274
// We may be reading a version of the object that was stored at an older version
253275
// and may not have had all of the assumed default specified.
@@ -412,7 +434,14 @@ func (c *Reconciler) reconcile(ctx context.Context, tr *v1alpha1.TaskRun) error
412434

413435
after := tr.Status.GetCondition(apis.ConditionSucceeded)
414436

415-
reconciler.EmitEvent(c.Recorder, before, after, tr)
437+
// If after is different from before and status is not Unknown, the taskrun
438+
// has completed its work - except for post-run tasks like emitting events,
439+
// recording metrics, sending cloud events.
440+
// Once tr.isDone becomes true, even when this key is queued, `reconcile`
441+
// won't be invoked so we won't pass through here again
442+
if tr.IsDone() && after != before {
443+
go c.postRunAsyncHook(ctx, tr, before)
444+
}
416445
c.Logger.Infof("Successfully reconciled taskrun %s/%s with status: %#v", tr.Name, tr.Namespace, after)
417446

418447
return nil
@@ -585,6 +614,7 @@ func (c *Reconciler) createPod(tr *v1alpha1.TaskRun, rtr *resources.ResolvedTask
585614
type DeletePod func(podName string, options *metav1.DeleteOptions) error
586615

587616
func (c *Reconciler) updateTaskRunStatusForTimeout(tr *v1alpha1.TaskRun, dp DeletePod) error {
617+
588618
c.Logger.Infof("TaskRun %q has timed out, deleting pod", tr.Name)
589619
// tr.Status.PodName will be empty if the pod was never successfully created. This condition
590620
// can be reached, for example, by the pod never being schedulable due to limits imposed by

0 commit comments

Comments
 (0)