From 2474fab05542aff3206804ac478926d3205e12ea Mon Sep 17 00:00:00 2001 From: Yong Zhao Date: Fri, 25 Oct 2019 11:21:42 -0700 Subject: [PATCH 1/7] [docker-dhcp-relay] Create a file named critical_processes. For dhcp-relay service, this file contains a single groupname: isc-dhcp-relay. Signed-off-by: Yong Zhao --- dockers/docker-dhcp-relay/critical_processes | 1 + 1 file changed, 1 insertion(+) create mode 100644 dockers/docker-dhcp-relay/critical_processes diff --git a/dockers/docker-dhcp-relay/critical_processes b/dockers/docker-dhcp-relay/critical_processes new file mode 100644 index 0000000000..ddb183963a --- /dev/null +++ b/dockers/docker-dhcp-relay/critical_processes @@ -0,0 +1 @@ +isc-dhcp-relay From 1cfa8e69268081aea2c4682125a384d9a1ac6102 Mon Sep 17 00:00:00 2001 From: Yong Zhao Date: Fri, 25 Oct 2019 11:32:01 -0700 Subject: [PATCH 2/7] [docker-dhcp-relay] Add paths of supervisord listener script and critical processes file into dockfile.j2. Signed-off-by: Yong Zhao --- dockers/docker-dhcp-relay/Dockerfile.j2 | 2 ++ 1 file changed, 2 insertions(+) diff --git a/dockers/docker-dhcp-relay/Dockerfile.j2 b/dockers/docker-dhcp-relay/Dockerfile.j2 index e365adab17..d3c09f9ba2 100644 --- a/dockers/docker-dhcp-relay/Dockerfile.j2 +++ b/dockers/docker-dhcp-relay/Dockerfile.j2 @@ -26,5 +26,7 @@ RUN apt-get clean -y && \ COPY ["docker_init.sh", "start.sh", "/usr/bin/"] COPY ["docker-dhcp-relay.supervisord.conf.j2", "wait_for_intf.sh.j2", "/usr/share/sonic/templates/"] +COPY ["files/supervisor-proc-exit-listener", "/usr/bin"] +COPY ["critical_processes", "/etc/supervisor"] ENTRYPOINT ["/usr/bin/docker_init.sh"] From 847aed0a3ac71fdf54968dd369ed968acab50861 Mon Sep 17 00:00:00 2001 From: Yong Zhao Date: Fri, 25 Oct 2019 11:36:19 -0700 Subject: [PATCH 3/7] [docker-dhcp-relay] Make event listener autostart by adding option in supervisord conf file. Signed-off-by: Yong Zhao --- .../docker-dhcp-relay/docker-dhcp-relay.supervisord.conf.j2 | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/dockers/docker-dhcp-relay/docker-dhcp-relay.supervisord.conf.j2 b/dockers/docker-dhcp-relay/docker-dhcp-relay.supervisord.conf.j2 index 4462ff3d7f..e738e8699e 100644 --- a/dockers/docker-dhcp-relay/docker-dhcp-relay.supervisord.conf.j2 +++ b/dockers/docker-dhcp-relay/docker-dhcp-relay.supervisord.conf.j2 @@ -3,6 +3,12 @@ logfile_maxbytes=1MB logfile_backups=2 nodaemon=true +[eventlistener:supervisor-proc-exit-listener] +command=/usr/bin/supervisor-proc-exit-listener +events=PROCESS_STATE_EXITED +autostart=true +autorestart=unexpected + [program:start.sh] command=/usr/bin/start.sh priority=1 From 2ed2adb54ab9a7f8dd3f16eb98fb3521e594615e Mon Sep 17 00:00:00 2001 From: Yong Zhao Date: Fri, 25 Oct 2019 11:41:03 -0700 Subject: [PATCH 4/7] [docker-dhcp-relay] Configure systemd to stop restarting dhcp-relay if it attempts to restart this container more than 3 times in 20 minutes. Signed-off-by: Yong Zhao --- files/build_templates/dhcp_relay.service.j2 | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/files/build_templates/dhcp_relay.service.j2 b/files/build_templates/dhcp_relay.service.j2 index 7ec133c87a..d0a27e4a7f 100644 --- a/files/build_templates/dhcp_relay.service.j2 +++ b/files/build_templates/dhcp_relay.service.j2 @@ -3,12 +3,16 @@ Description=DHCP relay container Requires=updategraph.service swss.service teamd.service After=updategraph.service swss.service teamd.service Before=ntp-config.service +StartLimitIntervalSec=1200 +StartLimitBurst=3 [Service] User={{ sonicadmin_user }} ExecStartPre=/usr/bin/{{ docker_container_name }}.sh start ExecStart=/usr/bin/{{ docker_container_name }}.sh wait ExecStop=/usr/bin/{{ docker_container_name }}.sh stop +Restart=always +RestartSec=30 [Install] WantedBy=multi-user.target swss.service teamd.service From e2ae9d1a9dc943b34e74bc042c04cff8dfe5f961 Mon Sep 17 00:00:00 2001 From: Yong Zhao Date: Fri, 25 Oct 2019 11:43:46 -0700 Subject: [PATCH 5/7] [docker-dhcp-relay] Add macro $(SUPERVISOR_PROC_EXIT_LISTENER_SCRIPT) to shared Makefile docker-dhcp-relay.mk. Signed-off-by: Yong Zhao --- rules/docker-dhcp-relay.mk | 1 + 1 file changed, 1 insertion(+) diff --git a/rules/docker-dhcp-relay.mk b/rules/docker-dhcp-relay.mk index 6fad19cc19..5aae24ee33 100644 --- a/rules/docker-dhcp-relay.mk +++ b/rules/docker-dhcp-relay.mk @@ -25,3 +25,4 @@ SONIC_STRETCH_DBG_DOCKERS += $(DOCKER_DHCP_RELAY_DBG) $(DOCKER_DHCP_RELAY)_CONTAINER_NAME = dhcp_relay $(DOCKER_DHCP_RELAY)_RUN_OPT += --net=host --privileged -t $(DOCKER_DHCP_RELAY)_RUN_OPT += -v /etc/sonic:/etc/sonic:ro +$(DOCKER_DHCP_RELAY)_FILES += $(SUPERVISOR_PROC_EXIT_LISTENER_SCRIPT) From b705b357c7465265dd9aa1e6f8704cf7ed82c3d3 Mon Sep 17 00:00:00 2001 From: Yong Zhao Date: Fri, 25 Oct 2019 11:48:02 -0700 Subject: [PATCH 6/7] [docker-dhcp-relay] Event listener will also be guided by a groupname which monitors a bunch of processes. Signed-off-by: Yong Zhao --- files/scripts/supervisor-proc-exit-listener | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/files/scripts/supervisor-proc-exit-listener b/files/scripts/supervisor-proc-exit-listener index 6bc62fc400..8d1735cd2b 100755 --- a/files/scripts/supervisor-proc-exit-listener +++ b/files/scripts/supervisor-proc-exit-listener @@ -33,9 +33,10 @@ def main(): expected = int(payload_headers['expected']) processname = payload_headers['processname'] + groupname = payload_headers['groupname'] # If a critical process exited unexpectedly, terminate supervisor - if expected == 0 and processname in critical_processes: + if expected == 0 and processname in critical_processes or groupname in critical_processes: MSG_FORMAT_STR = "Process {} exited unxepectedly. Terminating supervisor..." msg = MSG_FORMAT_STR.format(payload_headers['processname']) syslog.syslog(syslog.LOG_INFO, msg) From 7ca5fc8ac8e5389b2f49e97fe84d9cc5be44dadc Mon Sep 17 00:00:00 2001 From: Yong Zhao Date: Tue, 5 Nov 2019 13:24:33 -0800 Subject: [PATCH 7/7] [docker-dhcp-relay] Add event listener option in test conf file. Signed-off-by: Yong Zhao --- .../tests/sample_output/docker-dhcp-relay.supervisord.conf | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/src/sonic-config-engine/tests/sample_output/docker-dhcp-relay.supervisord.conf b/src/sonic-config-engine/tests/sample_output/docker-dhcp-relay.supervisord.conf index d285fbfc78..bae273eeaf 100644 --- a/src/sonic-config-engine/tests/sample_output/docker-dhcp-relay.supervisord.conf +++ b/src/sonic-config-engine/tests/sample_output/docker-dhcp-relay.supervisord.conf @@ -3,6 +3,12 @@ logfile_maxbytes=1MB logfile_backups=2 nodaemon=true +[eventlistener:supervisor-proc-exit-listener] +command=/usr/bin/supervisor-proc-exit-listener +events=PROCESS_STATE_EXITED +autostart=true +autorestart=unexpected + [program:start.sh] command=/usr/bin/start.sh priority=1