@@ -16,6 +16,7 @@ package providers
1616
1717import (
1818 "context"
19+ "errors"
1920 "fmt"
2021 "os"
2122 "sync"
@@ -183,7 +184,18 @@ func (c *Controller) Run(ctx context.Context) error {
183184 return err
184185 }
185186
186- c .run (rootCtx )
187+ err := c .run (rootCtx )
188+ if err != nil {
189+ log .Errorf ("provider run returned error, exiting process: %s" , err )
190+
191+ // attempt to give up leader status, should also release the waitgroup and exit the process
192+ rootCancel ()
193+ go func () {
194+ time .Sleep (time .Second * 5 )
195+ log .Errorf ("process has not quit 5s after provider failure, forcing exit: %s" , err )
196+ os .Exit (1 )
197+ }()
198+ }
187199
188200 wg .Wait ()
189201 return nil
@@ -382,7 +394,7 @@ func (c *Controller) initSharedInformers() *providertypes.ListerInformer {
382394 return listerInformer
383395}
384396
385- func (c * Controller ) run (ctx context.Context ) {
397+ func (c * Controller ) run (ctx context.Context ) error {
386398 log .Infow ("controller tries to leading ..." ,
387399 zap .String ("namespace" , c .namespace ),
388400 zap .String ("pod" , c .name ),
@@ -406,29 +418,22 @@ func (c *Controller) run(ctx context.Context) {
406418 CacheSynced : ! c .cfg .EtcdServer .Enabled ,
407419 SSLKeyEncryptSalt : c .cfg .EtcdServer .SSLKeyEncryptSalt ,
408420 }
421+
422+ // TODO: needs retry logic
409423 err := c .apisix .AddCluster (ctx , clusterOpts )
410424 if err != nil && err != apisix .ErrDuplicatedCluster {
411- // TODO give up the leader role
412425 log .Errorf ("failed to add default cluster: %s" , err )
413- return
426+ return err
414427 }
415428
416429 if err := c .apisix .Cluster (c .cfg .APISIX .DefaultClusterName ).HasSynced (ctx ); err != nil {
417- // TODO give up the leader role
418430 log .Errorf ("failed to wait the default cluster to be ready: %s" , err )
419-
420- // re-create apisix cluster, used in next c.run
421- if err = c .apisix .UpdateCluster (ctx , clusterOpts ); err != nil {
422- log .Errorf ("failed to update default cluster: %s" , err )
423- return
424- }
425- return
431+ return err
426432 }
427433
428434 // Creation Phase
429435
430436 log .Info ("creating controller" )
431-
432437 c .informers = c .initSharedInformers ()
433438 common := & providertypes.Common {
434439 ControllerNamespace : c .namespace ,
@@ -443,14 +448,12 @@ func (c *Controller) run(ctx context.Context) {
443448
444449 c .namespaceProvider , err = namespace .NewWatchingNamespaceProvider (ctx , c .kubeClient , c .cfg , c .resourceSyncCh )
445450 if err != nil {
446- ctx .Done ()
447- return
451+ return err
448452 }
449453
450454 c .podProvider , err = pod .NewProvider (common , c .namespaceProvider )
451455 if err != nil {
452- ctx .Done ()
453- return
456+ return err
454457 }
455458
456459 c .translator = translation .NewTranslator (& translation.TranslatorOptions {
@@ -466,20 +469,17 @@ func (c *Controller) run(ctx context.Context) {
466469
467470 c .apisixProvider , c .apisixTranslator , err = apisixprovider .NewProvider (common , c .namespaceProvider , c .translator )
468471 if err != nil {
469- ctx .Done ()
470- return
472+ return err
471473 }
472474
473475 c .ingressProvider , err = ingressprovider .NewProvider (common , c .namespaceProvider , c .translator , c .apisixTranslator )
474476 if err != nil {
475- ctx .Done ()
476- return
477+ return err
477478 }
478479
479480 c .kubeProvider , err = k8s .NewProvider (common , c .translator , c .namespaceProvider , c .apisixProvider , c .ingressProvider )
480481 if err != nil {
481- ctx .Done ()
482- return
482+ return err
483483 }
484484
485485 if c .cfg .Kubernetes .EnableGatewayAPI {
@@ -495,8 +495,7 @@ func (c *Controller) run(ctx context.Context) {
495495 ListerInformer : common .ListerInformer ,
496496 })
497497 if err != nil {
498- ctx .Done ()
499- return
498+ return err
500499 }
501500 }
502501
@@ -505,25 +504,22 @@ func (c *Controller) run(ctx context.Context) {
505504 log .Info ("init namespaces" )
506505
507506 if err = c .namespaceProvider .Init (ctx ); err != nil {
508- ctx .Done ()
509- return
507+ return err
510508 }
511509
512510 log .Info ("wait for resource sync" )
513511
514512 // Wait for resource sync
515513 if ok := c .informers .StartAndWaitForCacheSync (ctx ); ! ok {
516- ctx .Done ()
517- return
514+ return errors .New ("StartAndWaitForCacheSync failed" )
518515 }
519516
520517 log .Info ("init providers" )
521518
522519 // Compare resource
523520 if ! c .cfg .EtcdServer .Enabled {
524521 if err = c .apisixProvider .Init (ctx ); err != nil {
525- ctx .Done ()
526- return
522+ return err
527523 }
528524 }
529525
@@ -573,6 +569,8 @@ func (c *Controller) run(ctx context.Context) {
573569 log .Error ("Start failed, abort..." )
574570 cancelFunc ()
575571 }
572+
573+ return nil
576574}
577575
578576func (c * Controller ) checkClusterHealth (ctx context.Context , cancelFunc context.CancelFunc ) {
0 commit comments