From 0b76f8327ba3e02945e58460c19fff61b4c9d10e Mon Sep 17 00:00:00 2001 From: Ash-exp Date: Wed, 14 May 2025 17:58:09 +0530 Subject: [PATCH 1/2] fix: anomolies in deployment status timeline --- pkg/app/AppService.go | 1 + .../PipelineStatusTimelineResourcesService.go | 10 ++++++++-- .../helper/deploymentStatusHelper.go | 18 +++++++++++------- 3 files changed, 20 insertions(+), 9 deletions(-) diff --git a/pkg/app/AppService.go b/pkg/app/AppService.go index 4c473a30b5..6287a970ff 100644 --- a/pkg/app/AppService.go +++ b/pkg/app/AppService.go @@ -610,6 +610,7 @@ func (impl *AppServiceImpl) UpdatePipelineStatusTimelineForApplicationChanges(ap haveNewTimeline = true timeline.Status = timelineStatus.TIMELINE_STATUS_APP_HEALTHY timeline.StatusDetail = "App status is Healthy." + timeline.StatusTime = statusTime } if haveNewTimeline { // not checking if this status is already present or not because already checked for terminal status existence earlier diff --git a/pkg/app/status/PipelineStatusTimelineResourcesService.go b/pkg/app/status/PipelineStatusTimelineResourcesService.go index 630185dee1..c6482106f2 100644 --- a/pkg/app/status/PipelineStatusTimelineResourcesService.go +++ b/pkg/app/status/PipelineStatusTimelineResourcesService.go @@ -92,18 +92,24 @@ func (impl *PipelineStatusTimelineResourcesServiceImpl) SaveOrUpdatePipelineTime if application != nil && application.Status.OperationState != nil && application.Status.OperationState.SyncResult != nil { for _, resource := range application.Status.OperationState.SyncResult.Resources { if resource != nil { + resourceStatus := string(resource.HookPhase) + if len(resourceStatus) == 0 { + resourceStatus = string(resource.Status) + } + //if resource is already present in the timelineResources, then update it if index, ok := oldTimelineResourceMap[resource.Name]; ok { - timelineResources[index].ResourceStatus = string(resource.HookPhase) + timelineResources[index].ResourceStatus = resourceStatus timelineResources[index].StatusMessage = resource.Message timelineResources[index].UpdatedBy = userId timelineResources[index].UpdatedOn = time.Now() timelineResourcesToBeUpdated = append(timelineResourcesToBeUpdated, timelineResources[index]) } else { + //if resource is not present in the timelineResources, then create a new one newTimelineResource := &pipelineConfig.PipelineStatusTimelineResources{ ResourceName: resource.Name, ResourceKind: resource.Kind, ResourceGroup: resource.Group, - ResourceStatus: string(resource.HookPhase), + ResourceStatus: resourceStatus, StatusMessage: resource.Message, AuditLog: sql.AuditLog{ CreatedBy: userId, diff --git a/pkg/argoApplication/helper/deploymentStatusHelper.go b/pkg/argoApplication/helper/deploymentStatusHelper.go index 972ffd1937..587ce5383e 100644 --- a/pkg/argoApplication/helper/deploymentStatusHelper.go +++ b/pkg/argoApplication/helper/deploymentStatusHelper.go @@ -10,13 +10,14 @@ import ( func GetSyncStartTime(app *v1alpha1.Application, defaultStartTime time.Time) time.Time { startTime := metav1.NewTime(defaultStartTime) gitHash := app.Status.Sync.Revision - if app.Status.OperationState != nil { + if app.Status.OperationState != nil && + app.Status.OperationState.Operation.Sync != nil && + app.Status.OperationState.Operation.Sync.Revision == gitHash { startTime = app.Status.OperationState.StartedAt - } else if app.Status.History != nil { - for _, history := range app.Status.History { - if history.Revision == gitHash { - startTime = *history.DeployStartedAt - } + } else if len(app.Status.History) != 0 { + if app.Status.History.LastRevisionHistory().Revision == gitHash && + app.Status.History.LastRevisionHistory().DeployStartedAt != nil { + startTime = *app.Status.History.LastRevisionHistory().DeployStartedAt } } return startTime.Time @@ -26,7 +27,10 @@ func GetSyncStartTime(app *v1alpha1.Application, defaultStartTime time.Time) tim func GetSyncFinishTime(app *v1alpha1.Application, defaultEndTime time.Time) time.Time { finishTime := metav1.NewTime(defaultEndTime) gitHash := app.Status.Sync.Revision - if app.Status.OperationState != nil && app.Status.OperationState.FinishedAt != nil { + if app.Status.OperationState != nil && + app.Status.OperationState.Operation.Sync != nil && + app.Status.OperationState.Operation.Sync.Revision == gitHash && + app.Status.OperationState.FinishedAt != nil { finishTime = *app.Status.OperationState.FinishedAt } else if app.Status.History != nil { for _, history := range app.Status.History { From b478c9c74f09d883ff2808d15b7ca51d8be79dbf Mon Sep 17 00:00:00 2001 From: Ash-exp Date: Thu, 15 May 2025 13:26:08 +0530 Subject: [PATCH 2/2] fix: argocd sync start/ finish validation --- pkg/app/AppService.go | 42 ++++++++++++++----- .../helper/deploymentStatusHelper.go | 29 +++++++------ pkg/workflow/status/WorkflowStatusService.go | 3 +- 3 files changed, 47 insertions(+), 27 deletions(-) diff --git a/pkg/app/AppService.go b/pkg/app/AppService.go index 6287a970ff..589f272bb3 100644 --- a/pkg/app/AppService.go +++ b/pkg/app/AppService.go @@ -74,14 +74,14 @@ import ( type AppServiceConfig struct { CdPipelineStatusCronTime string `env:"CD_PIPELINE_STATUS_CRON_TIME" envDefault:"*/2 * * * *" description:"Cron time for CD pipeline status"` CdHelmPipelineStatusCronTime string `env:"CD_HELM_PIPELINE_STATUS_CRON_TIME" envDefault:"*/2 * * * *" description:"Cron time to check the pipeline status "` - CdPipelineStatusTimeoutDuration string `env:"CD_PIPELINE_STATUS_TIMEOUT_DURATION" envDefault:"20" description:"Timeout for CD pipeline to get healthy" ` // in minutes - PipelineDegradedTime string `env:"PIPELINE_DEGRADED_TIME" envDefault:"10" description:"Time to mark a pipeline degraded if not healthy in defined time"` // in minutes - GetPipelineDeployedWithinHours int `env:"DEPLOY_STATUS_CRON_GET_PIPELINE_DEPLOYED_WITHIN_HOURS" envDefault:"12" description:"This flag is used to fetch the deployment status of the application. It retrieves the status of deployments that occurred between 12 hours and 10 minutes prior to the current time. It fetches non-terminal statuses."` // in hours - HelmPipelineStatusCheckEligibleTime string `env:"HELM_PIPELINE_STATUS_CHECK_ELIGIBLE_TIME" envDefault:"120" description:"eligible time for checking helm app status periodically and update in db, value is in seconds., default is 120, if wfr is updated within configured time i.e. HELM_PIPELINE_STATUS_CHECK_ELIGIBLE_TIME then do not include for this cron cycle."` // in seconds + CdPipelineStatusTimeoutDuration string `env:"CD_PIPELINE_STATUS_TIMEOUT_DURATION" envDefault:"20" description:"Timeout for CD pipeline to get healthy" ` // in minutes + PipelineDegradedTime string `env:"PIPELINE_DEGRADED_TIME" envDefault:"10" description:"Time to mark a pipeline degraded if not healthy in defined time"` // in minutes + GetPipelineDeployedWithinHours int `env:"DEPLOY_STATUS_CRON_GET_PIPELINE_DEPLOYED_WITHIN_HOURS" envDefault:"12" description:"This flag is used to fetch the deployment status of the application. It retrieves the status of deployments that occurred between 12 hours and 10 minutes prior to the current time. It fetches non-terminal statuses."` // in hours + HelmPipelineStatusCheckEligibleTime string `env:"HELM_PIPELINE_STATUS_CHECK_ELIGIBLE_TIME" envDefault:"120" description:"eligible time for checking helm app status periodically and update in db, value is in seconds., default is 120, if wfr is updated within configured time i.e. HELM_PIPELINE_STATUS_CHECK_ELIGIBLE_TIME then do not include for this cron cycle."` // in seconds ExposeCDMetrics bool `env:"EXPOSE_CD_METRICS" envDefault:"false"` - DevtronChartHelmInstallRequestTimeout int `env:"DEVTRON_CHART_INSTALL_REQUEST_TIMEOUT" envDefault:"6" description:"Context timeout for no gitops concurrent async deployments"` // in minutes - DevtronChartArgoCdInstallRequestTimeout int `env:"DEVTRON_CHART_ARGO_CD_INSTALL_REQUEST_TIMEOUT" envDefault:"1" description:"Context timeout for gitops concurrent async deployments"` // in minutes - ArgoCdManualSyncCronPipelineDeployedBefore int `env:"ARGO_APP_MANUAL_SYNC_TIME" envDefault:"3" description:"retry argocd app manual sync if the timeline is stuck in ARGOCD_SYNC_INITIATED state for more than this defined time (in mins)"` // in minutes + DevtronChartHelmInstallRequestTimeout int `env:"DEVTRON_CHART_INSTALL_REQUEST_TIMEOUT" envDefault:"6" description:"Context timeout for no gitops concurrent async deployments"` // in minutes + DevtronChartArgoCdInstallRequestTimeout int `env:"DEVTRON_CHART_ARGO_CD_INSTALL_REQUEST_TIMEOUT" envDefault:"1" description:"Context timeout for gitops concurrent async deployments"` // in minutes + ArgoCdManualSyncCronPipelineDeployedBefore int `env:"ARGO_APP_MANUAL_SYNC_TIME" envDefault:"3" description:"retry argocd app manual sync if the timeline is stuck in ARGOCD_SYNC_INITIATED state for more than this defined time (in mins)"` // in minutes } func GetAppServiceConfig() (*AppServiceConfig, error) { @@ -555,10 +555,15 @@ func (impl *AppServiceImpl) UpdatePipelineStatusTimelineForApplicationChanges(ap if err != nil { impl.logger.Errorw("error in save/update pipeline status fetch detail", "err", err, "cdWfrId", runnerHistoryId) } + syncStartTime, found := helper.GetSyncStartTime(app) + if !found { + impl.logger.Warnw("sync operation not started yet", "app", app) + return isTimelineUpdated, isTimelineTimedOut, kubectlApplySyncedTimeline, fmt.Errorf("sync operation not started yet") + } // creating cd pipeline status timeline timeline := &pipelineConfig.PipelineStatusTimeline{ CdWorkflowRunnerId: runnerHistoryId, - StatusTime: helper.GetSyncStartTime(app, statusTime), + StatusTime: syncStartTime, AuditLog: sql.AuditLog{ CreatedBy: 1, CreatedOn: time.Now(), @@ -591,7 +596,12 @@ func (impl *AppServiceImpl) UpdatePipelineStatusTimelineForApplicationChanges(ap timeline.Id = 0 timeline.Status = timelineStatus.TIMELINE_STATUS_KUBECTL_APPLY_SYNCED timeline.StatusDetail = app.Status.OperationState.Message - timeline.StatusTime = helper.GetSyncFinishTime(app, statusTime) + syncFinishTime, found := helper.GetSyncFinishTime(app) + if !found { + impl.logger.Warnw("sync operation not found for the deployment", "app", app) + return isTimelineUpdated, isTimelineTimedOut, kubectlApplySyncedTimeline, fmt.Errorf("sync operation not found for the deployment") + } + timeline.StatusTime = syncFinishTime // checking and saving if this timeline is present or not because kubewatch may stream same objects multiple times err = impl.pipelineStatusTimelineService.SaveTimeline(timeline, nil) if err != nil { @@ -669,10 +679,15 @@ func (impl *AppServiceImpl) UpdatePipelineStatusTimelineForApplicationChanges(ap if err != nil { impl.logger.Errorw("error in save/update pipeline status fetch detail", "err", err, "installedAppVersionHistoryId", runnerHistoryId) } + syncStartTime, found := helper.GetSyncStartTime(app) + if !found { + impl.logger.Warnw("sync operation not started yet", "app", app) + return isTimelineUpdated, isTimelineTimedOut, kubectlApplySyncedTimeline, fmt.Errorf("sync operation not started yet") + } // creating installedAppVersionHistory status timeline timeline := &pipelineConfig.PipelineStatusTimeline{ InstalledAppVersionHistoryId: runnerHistoryId, - StatusTime: helper.GetSyncStartTime(app, statusTime), + StatusTime: syncStartTime, AuditLog: sql.AuditLog{ CreatedBy: 1, CreatedOn: time.Now(), @@ -705,7 +720,12 @@ func (impl *AppServiceImpl) UpdatePipelineStatusTimelineForApplicationChanges(ap timeline.Id = 0 timeline.Status = timelineStatus.TIMELINE_STATUS_KUBECTL_APPLY_SYNCED timeline.StatusDetail = app.Status.OperationState.Message - timeline.StatusTime = helper.GetSyncFinishTime(app, statusTime) + syncFinishTime, found := helper.GetSyncFinishTime(app) + if !found { + impl.logger.Warnw("sync operation not found for the deployment", "app", app) + return isTimelineUpdated, isTimelineTimedOut, kubectlApplySyncedTimeline, fmt.Errorf("sync operation not found for the deployment") + } + timeline.StatusTime = syncFinishTime // checking and saving if this timeline is present or not because kubewatch may stream same objects multiple times err = impl.pipelineStatusTimelineService.SaveTimeline(timeline, nil) if err != nil { diff --git a/pkg/argoApplication/helper/deploymentStatusHelper.go b/pkg/argoApplication/helper/deploymentStatusHelper.go index 587ce5383e..ec9f7fc6ca 100644 --- a/pkg/argoApplication/helper/deploymentStatusHelper.go +++ b/pkg/argoApplication/helper/deploymentStatusHelper.go @@ -2,42 +2,41 @@ package helper import ( "github.com/argoproj/argo-cd/v2/pkg/apis/application/v1alpha1" - metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" "time" ) // GetSyncStartTime assumes that it is always called for calculating start time of latest git hash -func GetSyncStartTime(app *v1alpha1.Application, defaultStartTime time.Time) time.Time { - startTime := metav1.NewTime(defaultStartTime) +func GetSyncStartTime(app *v1alpha1.Application) (time.Time, bool) { gitHash := app.Status.Sync.Revision if app.Status.OperationState != nil && app.Status.OperationState.Operation.Sync != nil && app.Status.OperationState.Operation.Sync.Revision == gitHash { - startTime = app.Status.OperationState.StartedAt + return app.Status.OperationState.StartedAt.Time, true } else if len(app.Status.History) != 0 { if app.Status.History.LastRevisionHistory().Revision == gitHash && app.Status.History.LastRevisionHistory().DeployStartedAt != nil { - startTime = *app.Status.History.LastRevisionHistory().DeployStartedAt + startTime := *app.Status.History.LastRevisionHistory().DeployStartedAt + return startTime.Time, true } } - return startTime.Time + return time.Time{}, false } // GetSyncFinishTime assumes that it is always called for calculating finish time of latest git hash -func GetSyncFinishTime(app *v1alpha1.Application, defaultEndTime time.Time) time.Time { - finishTime := metav1.NewTime(defaultEndTime) +func GetSyncFinishTime(app *v1alpha1.Application) (time.Time, bool) { gitHash := app.Status.Sync.Revision if app.Status.OperationState != nil && app.Status.OperationState.Operation.Sync != nil && app.Status.OperationState.Operation.Sync.Revision == gitHash && app.Status.OperationState.FinishedAt != nil { - finishTime = *app.Status.OperationState.FinishedAt - } else if app.Status.History != nil { - for _, history := range app.Status.History { - if history.Revision == gitHash { - finishTime = history.DeployedAt - } + finishTime := *app.Status.OperationState.FinishedAt + return finishTime.Time, true + } else if len(app.Status.History) != 0 { + if app.Status.History.LastRevisionHistory().Revision == gitHash && + app.Status.History.LastRevisionHistory().DeployStartedAt != nil { + finishTime := *app.Status.History.LastRevisionHistory().DeployStartedAt + return finishTime.Time, true } } - return finishTime.Time + return time.Time{}, false } diff --git a/pkg/workflow/status/WorkflowStatusService.go b/pkg/workflow/status/WorkflowStatusService.go index 5c109fed8f..78d962f884 100644 --- a/pkg/workflow/status/WorkflowStatusService.go +++ b/pkg/workflow/status/WorkflowStatusService.go @@ -455,7 +455,8 @@ func (impl *WorkflowStatusServiceImpl) CheckAndSendArgoPipelineStatusSyncEventIf } // pipelineId can be cdPipelineId or installedAppVersionId, using isAppStoreApplication flag to identify between them - if lastSyncTime.IsZero() || (!lastSyncTime.IsZero() && time.Since(lastSyncTime) > 5*time.Second) { // create new nats event + if lastSyncTime.IsZero() || (!lastSyncTime.IsZero() && time.Since(lastSyncTime) > 5*time.Second) { + // create new nats event err = impl.cdPipelineEventPublishService.PublishArgoTypePipelineSyncEvent(pipelineId, installedAppVersionId, userId, isAppStoreApplication) if err != nil { impl.logger.Errorw("error, PublishArgoTypePipelineSyncEvent", "err", err)