Skip to content
Merged
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions dockers/docker-database/Dockerfile.j2
Original file line number Diff line number Diff line change
Expand Up @@ -36,5 +36,7 @@ COPY ["supervisord.conf.j2", "/usr/share/sonic/templates/"]
COPY ["docker-database-init.sh", "/usr/local/bin/"]
COPY ["ping_pong_db_insts", "/usr/local/bin/"]
COPY ["database_config.json", "/etc/default/sonic-db/"]
COPY ["files/supervisor-proc-exit-listener", "/usr/bin"]
COPY ["critical_processes", "/etc/supervisor"]

ENTRYPOINT ["/usr/local/bin/docker-database-init.sh"]
1 change: 1 addition & 0 deletions dockers/docker-database/critical_processes
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
redis
7 changes: 7 additions & 0 deletions dockers/docker-database/supervisord.conf.j2
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,13 @@ logfile_maxbytes=1MB
logfile_backups=2
nodaemon=true

[eventlistener:supervisor-proc-exit-listener]
command=/usr/bin/supervisor-proc-exit-listener --container-name database
events=PROCESS_STATE_EXITED
autostart=true
autorestart=unexpected


[program:rsyslogd]
command=/bin/bash -c "rm -f /var/run/rsyslogd.pid && /usr/sbin/rsyslogd -n"
priority=1
Expand Down
2 changes: 1 addition & 1 deletion files/build_templates/dhcp_relay.service.j2
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
[Unit]
Description=DHCP relay container
Requires=updategraph.service
Requires=updategraph.service database.service
After=updategraph.service swss.service syncd.service teamd.service
Before=ntp-config.service
StartLimitIntervalSec=1200
Expand Down
2 changes: 1 addition & 1 deletion files/build_templates/lldp.service.j2
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
[Unit]
Description=LLDP container
Requires=updategraph.service
Requires=updategraph.service database.service
After=updategraph.service swss.service syncd.service
Before=ntp-config.service
StartLimitIntervalSec=1200
Expand Down
2 changes: 1 addition & 1 deletion files/build_templates/radv.service.j2
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
[Unit]
Description=Router advertiser container
Requires=updategraph.service
Requires=updategraph.service database.service
After=updategraph.service swss.service syncd.service
Before=ntp-config.service
StartLimitIntervalSec=1200
Expand Down
2 changes: 1 addition & 1 deletion files/build_templates/sflow.service.j2
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
[Unit]
Description=sFlow container
Requisite=swss.service
Requisite=swss.service database.service
After=swss.service syncd.service
Before=ntp-config.service
StartLimitIntervalSec=1200
Expand Down
2 changes: 1 addition & 1 deletion files/build_templates/single_instance/bgp.service.j2
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
[Unit]
Description=BGP container
Requires=updategraph.service
Requires=updategraph.service database.service
After=updategraph.service
Before=ntp-config.service

Expand Down
4 changes: 4 additions & 0 deletions files/build_templates/single_instance/database.service.j2
Original file line number Diff line number Diff line change
Expand Up @@ -3,12 +3,16 @@ Description=Database container
Requires=docker.service
After=docker.service
After=rc-local.service
StartLimitIntervalSec=1200
StartLimitBurst=3

[Service]
User=root
ExecStartPre=/usr/bin/{{docker_container_name}}.sh start
ExecStart=/usr/bin/{{docker_container_name}}.sh wait
ExecStop=/usr/bin/{{docker_container_name}}.sh stop
Restart=always
RestartSec=30

[Install]
WantedBy=multi-user.target
2 changes: 1 addition & 1 deletion files/build_templates/single_instance/teamd.service.j2
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
[Unit]
Description=TEAMD container
Requires=updategraph.service
Requires=updategraph.service database.service
After=updategraph.service swss.service
Before=ntp-config.service
StartLimitIntervalSec=1200
Expand Down
2 changes: 1 addition & 1 deletion files/build_templates/snmp.service.j2
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
[Unit]
Description=SNMP container
Requires=updategraph.service
Requires=updategraph.service database.service
Requisite=swss.service
After=updategraph.service swss.service syncd.service
Before=ntp-config.service
Expand Down
33 changes: 17 additions & 16 deletions files/scripts/supervisor-proc-exit-listener
Original file line number Diff line number Diff line change
Expand Up @@ -52,24 +52,25 @@ def main(argv):
processname = payload_headers['processname']
groupname = payload_headers['groupname']

config_db = swsssdk.ConfigDBConnector()
config_db.connect()
container_features_table = config_db.get_table(CONTAINER_FEATURE_TABLE_NAME)
if not container_features_table:
syslog.syslog(syslog.LOG_ERR, "Unable to retrieve container features table from Config DB. Exiting...")
sys.exit(2)

if not container_features_table.has_key(container_name):
syslog.syslog(syslog.LOG_ERR, "Unable to retrieve features for container '{}'. Exiting...".format(container_name))
sys.exit(3)

restart_feature = container_features_table[container_name].get('auto_restart')
if not restart_feature:
syslog.syslog(syslog.LOG_ERR, "Unable to determine auto-restart feature status for container '{}'. Exiting...".format(container_name))
sys.exit(4)
if container_name != 'database':
config_db = swsssdk.ConfigDBConnector()
config_db.connect()
container_features_table = config_db.get_table(CONTAINER_FEATURE_TABLE_NAME)
if not container_features_table:
syslog.syslog(syslog.LOG_ERR, "Unable to retrieve container features table from Config DB. Exiting...")
sys.exit(2)

if not container_features_table.has_key(container_name):
syslog.syslog(syslog.LOG_ERR, "Unable to retrieve features for container '{}'. Exiting...".format(container_name))
sys.exit(3)

restart_feature = container_features_table[container_name].get('auto_restart')
if not restart_feature:
syslog.syslog(syslog.LOG_ERR, "Unable to determine auto-restart feature status for container '{}'. Exiting...".format(container_name))
sys.exit(4)

# If auto-restart feature is enabled and a critical process exited unexpectedly, terminate supervisor
if restart_feature == 'enabled' and expected == 0 and (processname in critical_processes or groupname in critical_processes):
if (container_name == 'database' or restart_feature == 'enabled') and expected == 0 and (processname in critical_processes or groupname in critical_processes):
MSG_FORMAT_STR = "Process {} exited unxepectedly. Terminating supervisor..."
msg = MSG_FORMAT_STR.format(payload_headers['processname'])
syslog.syslog(syslog.LOG_INFO, msg)
Expand Down
1 change: 1 addition & 0 deletions rules/docker-database.mk
Original file line number Diff line number Diff line change
Expand Up @@ -28,3 +28,4 @@ $(DOCKER_DATABASE)_RUN_OPT += -v /etc/sonic:/etc/sonic:ro

$(DOCKER_DATABASE)_BASE_IMAGE_FILES += redis-cli:/usr/bin/redis-cli
$(DOCKER_DATABASE)_BASE_IMAGE_FILES += monit_database:/etc/monit/conf.d
$(DOCKER_DATABASE)_FILES += $(SUPERVISOR_PROC_EXIT_LISTENER_SCRIPT)