diff --git a/dockers/docker-dhcp-relay/Dockerfile.j2 b/dockers/docker-dhcp-relay/Dockerfile.j2 index d3c09f9ba26..7afa64efab8 100644 --- a/dockers/docker-dhcp-relay/Dockerfile.j2 +++ b/dockers/docker-dhcp-relay/Dockerfile.j2 @@ -26,7 +26,7 @@ RUN apt-get clean -y && \ COPY ["docker_init.sh", "start.sh", "/usr/bin/"] COPY ["docker-dhcp-relay.supervisord.conf.j2", "wait_for_intf.sh.j2", "/usr/share/sonic/templates/"] -COPY ["files/supervisor-proc-exit-listener", "/usr/bin"] +COPY ["supervisor-proc-exit-listener", "/usr/bin"] COPY ["critical_processes", "/etc/supervisor"] ENTRYPOINT ["/usr/bin/docker_init.sh"] diff --git a/dockers/docker-dhcp-relay/supervisor-proc-exit-listener b/dockers/docker-dhcp-relay/supervisor-proc-exit-listener new file mode 100755 index 00000000000..e10dd160de8 --- /dev/null +++ b/dockers/docker-dhcp-relay/supervisor-proc-exit-listener @@ -0,0 +1,59 @@ +#!/usr/bin/env python + +import os +import signal +import sys +import syslog +import swsssdk + +from supervisor import childutils + +CONTAINER_NAME = 'dhcp_relay' + +# Contents of file should be the names of critical processes (as defined in +# supervisor.conf file), one per line +CRITICAL_PROCESSES_FILE = '/etc/supervisor/critical_processes' + +# This table in databse contains the features for container and each +# feature for a row will be configured a state or number. +CONTAINER_FEATURE_TABLE_NAME = 'CONTAINER_FEATURE' + +def main(): + # Read the list of critical processes from a file + with open(CRITICAL_PROCESSES_FILE, 'r') as f: + critical_processes = [line.rstrip('\n') for line in f] + + while True: + # Transition from ACKNOWLEDGED to READY + childutils.listener.ready() + + line = sys.stdin.readline() + headers = childutils.get_headers(line) + payload = sys.stdin.read(int(headers['len'])) + + # Transition from READY to ACKNOWLEDGED + childutils.listener.ok() + + # We only care about PROCESS_STATE_EXITED events + if headers['eventname'] == 'PROCESS_STATE_EXITED': + payload_headers, payload_data = childutils.eventdata(payload + '\n') + + expected = int(payload_headers['expected']) + processname = payload_headers['processname'] + groupname = payload_headers['groupname'] + + config_db = swsssdk.ConfigDBConnector() + config_db.connect() + docker_config = config_db.get_table(CONTAINER_FEATURE_TABLE_NAME) + if docker_config and docker_config.has_key(CONTAINER_NAME): + restart_feature = docker_config[CONTAINER_NAME].get('auto_restart') + + # If auto-feature is enabled and a critical process exited unexpectedly, terminate supervisor + if restart_feature == 'enabled' and expected == 0 and (processname in critical_processes or groupname in critical_processes): + MSG_FORMAT_STR = "Process {} exited unxepectedly. Terminating supervisor..." + msg = MSG_FORMAT_STR.format(payload_headers['processname']) + syslog.syslog(syslog.LOG_INFO, msg) + os.kill(os.getppid(), signal.SIGTERM) + +if __name__ == "__main__": + main() diff --git a/rules/docker-dhcp-relay.mk b/rules/docker-dhcp-relay.mk index 8deb6ebbfad..6d76cc8c059 100644 --- a/rules/docker-dhcp-relay.mk +++ b/rules/docker-dhcp-relay.mk @@ -25,4 +25,3 @@ SONIC_STRETCH_DBG_DOCKERS += $(DOCKER_DHCP_RELAY_DBG) $(DOCKER_DHCP_RELAY)_CONTAINER_NAME = dhcp_relay $(DOCKER_DHCP_RELAY)_RUN_OPT += --net=host --privileged -t $(DOCKER_DHCP_RELAY)_RUN_OPT += -v /etc/sonic:/etc/sonic:ro -$(DOCKER_DHCP_RELAY)_FILES += $(SUPERVISOR_PROC_EXIT_LISTENER_SCRIPT)