Skip to content

Commit 64a6989

Browse files
authored
[Services] Restart NAT service upon unexpected critical process exit. (#4208)
1 parent 09c0563 commit 64a6989

5 files changed

Lines changed: 17 additions & 2 deletions

File tree

dockers/docker-nat/Dockerfile.j2

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -38,6 +38,8 @@ RUN apt-get update \
3838
COPY ["start.sh", "/usr/bin/"]
3939
COPY ["supervisord.conf", "/etc/supervisor/conf.d/"]
4040
COPY ["restore_nat_entries.py", "/usr/bin/"]
41+
COPY ["files/supervisor-proc-exit-listener", "/usr/bin"]
42+
COPY ["critical_processes", "/etc/supervisor"]
4143

4244
RUN apt-get clean -y; apt-get autoclean -y; apt-get autoremove -y
4345
RUN rm -rf /debs
Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,2 @@
1+
natmgrd
2+
natsyncd

dockers/docker-nat/supervisord.conf

Lines changed: 7 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,12 @@ logfile_maxbytes=1MB
33
logfile_backups=2
44
nodaemon=true
55

6+
[eventlistener:supervisor-proc-exit-listener]
7+
command=/usr/bin/supervisor-proc-exit-listener --container-name nat
8+
events=PROCESS_STATE_EXITED
9+
autostart=true
10+
autorestart=unexpected
11+
612
[program:start.sh]
713
command=/usr/bin/start.sh
814
priority=1
@@ -15,7 +21,7 @@ stderr_logfile=syslog
1521
command=/usr/sbin/rsyslogd -n
1622
priority=2
1723
autostart=false
18-
autorestart=false
24+
autorestart=unexpected
1925
stdout_logfile=syslog
2026
stderr_logfile=syslog
2127

files/build_templates/nat.service.j2

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3,12 +3,16 @@ Description=NAT container
33
Requires=updategraph.service swss.service
44
After=updategraph.service swss.service syncd.service
55
Before=ntp-config.service
6+
StartLimitIntervalSec=1200
7+
StartLimitBurst=3
68

79
[Service]
810
User={{ sonicadmin_user }}
911
ExecStartPre=/usr/bin/{{docker_container_name}}.sh start
1012
ExecStart=/usr/bin/{{docker_container_name}}.sh wait
1113
ExecStop=/usr/bin/{{docker_container_name}}.sh stop
14+
Restart=always
15+
RestartSec=30
1216

1317
[Install]
1418
WantedBy=multi-user.target swss.service

rules/docker-nat.mk

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -30,5 +30,6 @@ $(DOCKER_NAT)_RUN_OPT += --privileged -t
3030
$(DOCKER_NAT)_RUN_OPT += -v /etc/sonic:/etc/sonic:ro
3131
$(DOCKER_NAT)_RUN_OPT += -v /host/warmboot:/var/warmboot
3232

33-
$(DOCKER_NAT)_BASE_IMAGE_FILES += natctl:/usr/bin/natctl
33+
$(DOCKER_NAT)_FILES += $(SUPERVISOR_PROC_EXIT_LISTENER_SCRIPT)
3434

35+
$(DOCKER_NAT)_BASE_IMAGE_FILES += natctl:/usr/bin/natctl

0 commit comments

Comments
 (0)