diff --git a/dockers/docker-database/base_image_files/monit_database b/dockers/docker-database/base_image_files/monit_database index c5508922864..47c9d1b2d47 100644 --- a/dockers/docker-database/base_image_files/monit_database +++ b/dockers/docker-database/base_image_files/monit_database @@ -3,5 +3,5 @@ ## process list: ## redis_server ############################################################################### -check process redis_server matching "/usr/bin/redis-server" - if does not exist for 5 times within 5 cycles then alert +check program database|redis_server with path "/usr/bin/process_checker database /usr/bin/redis-server" + if status != 0 for 5 times within 5 cycles then alert repeat every 1 cycles diff --git a/dockers/docker-fpm-frr/base_image_files/monit_bgp b/dockers/docker-fpm-frr/base_image_files/monit_bgp index 5dbb794c346..85696dba28b 100644 --- a/dockers/docker-fpm-frr/base_image_files/monit_bgp +++ b/dockers/docker-fpm-frr/base_image_files/monit_bgp @@ -7,17 +7,17 @@ ## staticd ## bgpcfgd ############################################################################### -check process zebra matching "/usr/lib/frr/zebra" - if does not exist for 5 times within 5 cycles then alert +check program bgp|zebra with path "/usr/bin/process_checker bgp /usr/lib/frr/zebra" + if status != 0 for 5 times within 5 cycles then alert repeat every 1 cycles -check process fpmsyncd matching "fpmsyncd" - if does not exist for 5 times within 5 cycles then alert +check program bgp|fpmsyncd with path "/usr/bin/process_checker bgp fpmsyncd" + if status != 0 for 5 times within 5 cycles then alert repeat every 1 cycles -check process bgpd matching "/usr/lib/frr/bgpd" - if does not exist for 5 times within 5 cycles then alert +check program bgp|bgpd with path "/usr/bin/process_checker bgp /usr/lib/frr/bgpd" + if status != 0 for 5 times within 5 cycles then alert repeat every 1 cycles -check process staticd matching "/usr/lib/frr/staticd" - if does not exist for 5 times within 5 cycles then alert +check program bgp|staticd with path "/usr/bin/process_checker bgp /usr/lib/frr/staticd" + if status != 0 for 5 times within 5 cycles then alert repeat every 1 cycles -check process bgpcfgd matching "python /usr/local/bin/bgpcfgd" - if does not exist for 5 times within 5 cycles then alert +check program bgp|bgpcfgd with path "/usr/bin/process_checker bgp /usr/bin/python /usr/local/bin/bgpcfgd" + if status != 0 for 5 times within 5 cycles then alert repeat every 1 cycles diff --git a/dockers/docker-lldp/base_image_files/monit_lldp b/dockers/docker-lldp/base_image_files/monit_lldp index 200c52c7d33..8dc2f3c1532 100644 --- a/dockers/docker-lldp/base_image_files/monit_lldp +++ b/dockers/docker-lldp/base_image_files/monit_lldp @@ -5,11 +5,11 @@ ## lldp-syncd ## lldpmgrd ############################################################################### -check process lldpd_monitor matching "lldpd: " - if does not exist for 5 times within 5 cycles then alert +check program lldp|lldpd_monitor with path "/usr/bin/process_checker lldp lldpd:" + if status != 0 for 5 times within 5 cycles then alert repeat every 1 cycles -check process lldp_syncd matching "python2 -m lldp_syncd" - if does not exist for 5 times within 5 cycles then alert +check program lldp|lldp_syncd with path "/usr/bin/process_checker lldp python2 -m lldp_syncd" + if status != 0 for 5 times within 5 cycles then alert repeat every 1 cycles -check process lldpmgrd matching "python /usr/bin/lldpmgrd" - if does not exist for 5 times within 5 cycles then alert +check program lldp|lldpmgrd with path "/usr/bin/process_checker lldp python /usr/bin/lldpmgrd" + if status != 0 for 5 times within 5 cycles then alert repeat every 1 cycles diff --git a/dockers/docker-orchagent/base_image_files/monit_swss b/dockers/docker-orchagent/base_image_files/monit_swss index 5928dbd4ddb..da601011e73 100644 --- a/dockers/docker-orchagent/base_image_files/monit_swss +++ b/dockers/docker-orchagent/base_image_files/monit_swss @@ -11,33 +11,33 @@ ## buffermgrd ## nbrmgrd ## vxlanmgrd -############################################################################### -check process orchagent matching "/usr/bin/orchagent -d /var/log/swss" - if does not exist for 5 times within 5 cycles then alert +############################################################################## +check program swss|orchagent with path "/usr/bin/process_checker swss /usr/bin/orchagent -d /var/log/swss" + if status != 0 for 5 times within 5 cycles then alert repeat every 1 cycles -check process portsyncd matching "/usr/bin/portsyncd" - if does not exist for 5 times within 5 cycles then alert +check program swss|portsyncd with path "/usr/bin/process_checker swss /usr/bin/portsyncd" + if status != 0 for 5 times within 5 cycles then alert repeat every 1 cycles -check process neighsyncd matching "/usr/bin/neighsyncd" - if does not exist for 5 times within 5 cycles then alert +check program swss|neighsyncd with path "/usr/bin/process_checker swss /usr/bin/neighsyncd" + if status != 0 for 5 times within 5 cycles then alert repeat every 1 cycles -check process vrfmgrd matching "/usr/bin/vrfmgrd" - if does not exist for 5 times within 5 cycles then alert +check program swss|vrfmgrd with path "/usr/bin/process_checker swss /usr/bin/vrfmgrd" + if status != 0 for 5 times within 5 cycles then alert repeat every 1 cycles -check process vlanmgrd matching "/usr/bin/vlanmgrd" - if does not exist for 5 times within 5 cycles then alert +check program swss|vlanmgrd with path "/usr/bin/process_checker swss /usr/bin/vlanmgrd" + if status != 0 for 5 times within 5 cycles then alert repeat every 1 cycles -check process intfmgrd matching "/usr/bin/intfmgrd" - if does not exist for 5 times within 5 cycles then alert +check program swss|intfmgrd with path "/usr/bin/process_checker swss /usr/bin/intfmgrd" + if status != 0 for 5 times within 5 cycles then alert repeat every 1 cycles -check process portmgrd matching "/usr/bin/portmgrd" - if does not exist for 5 times within 5 cycles then alert +check program swss|portmgrd with path "/usr/bin/process_checker swss /usr/bin/portmgrd" + if status != 0 for 5 times within 5 cycles then alert repeat every 1 cycles -check process buffermgrd matching "/usr/bin/buffermgrd -l" - if does not exist for 5 times within 5 cycles then alert +check program swss|buffermgrd with path "/usr/bin/process_checker swss /usr/bin/buffermgrd -l" + if status != 0 for 5 times within 5 cycles then alert repeat every 1 cycles -check process nbrmgrd matching "/usr/bin/nbrmgrd" - if does not exist for 5 times within 5 cycles then alert +check program swss|nbrmgrd with path "/usr/bin/process_checker swss /usr/bin/nbrmgrd" + if status != 0 for 5 times within 5 cycles then alert repeat every 1 cycles -check process vxlanmgrd matching "/usr/bin/vxlanmgrd" - if does not exist for 5 times within 5 cycles then alert +check program swss|vxlanmgrd with path "/usr/bin/process_checker swss /usr/bin/vxlanmgrd" + if status != 0 for 5 times within 5 cycles then alert repeat every 1 cycles diff --git a/dockers/docker-sflow/base_image_files/monit_sflow b/dockers/docker-sflow/base_image_files/monit_sflow index d041f81001e..84b36b18ce6 100644 --- a/dockers/docker-sflow/base_image_files/monit_sflow +++ b/dockers/docker-sflow/base_image_files/monit_sflow @@ -3,5 +3,5 @@ ## process list: ## sflowmgrd ############################################################################### -check process sflowmgrd matching "/usr/bin/sflowmgrd" - if does not exist for 5 times within 5 cycles then alert +check program sflow|sflowmgrd with path "/usr/bin/process_checker sflow /usr/bin/sflowmgrd" + if status != 0 for 5 times within 5 cycles then alert repeat every 1 cycles diff --git a/dockers/docker-snmp/base_image_files/monit_snmp b/dockers/docker-snmp/base_image_files/monit_snmp index cfb1a2b6683..6a368a9b603 100644 --- a/dockers/docker-snmp/base_image_files/monit_snmp +++ b/dockers/docker-snmp/base_image_files/monit_snmp @@ -4,8 +4,8 @@ ## snmpd ## snmpd_subagent ############################################################################### -check process snmpd matching "/usr/sbin/snmpd\s" - if does not exist for 5 times within 5 cycles then alert +check program snmp|snmpd with path "/usr/bin/process_checker snmp /usr/sbin/snmpd" + if status != 0 for 5 times within 5 cycles then alert repeat every 1 cycles -check process snmp_subagent matching "python3 -m sonic_ax_impl" - if does not exist for 5 times within 5 cycles then alert +check program snmp|snmp_subagent with path "/usr/bin/process_checker snmp python3 -m sonic_ax_impl" + if status != 0 for 5 times within 5 cycles then alert repeat every 1 cycles diff --git a/dockers/docker-sonic-restapi/base_image_files/monit_restapi b/dockers/docker-sonic-restapi/base_image_files/monit_restapi index 2e90baf30d5..6752100b84f 100644 --- a/dockers/docker-sonic-restapi/base_image_files/monit_restapi +++ b/dockers/docker-sonic-restapi/base_image_files/monit_restapi @@ -3,5 +3,5 @@ ## process list: ## restapi ############################################################################### -check process restapi matching "/usr/sbin/go-server-server" - if does not exist for 5 times within 5 cycles then alert +check program restapi|restapi with path "/usr/bin/process_checker restapi /usr/sbin/go-server-server" + if status != 0 for 5 times within 5 cycles then alert repeat every 1 cycles diff --git a/dockers/docker-sonic-telemetry/base_image_files/monit_telemetry b/dockers/docker-sonic-telemetry/base_image_files/monit_telemetry index a82c652f817..3680bbe6cf9 100644 --- a/dockers/docker-sonic-telemetry/base_image_files/monit_telemetry +++ b/dockers/docker-sonic-telemetry/base_image_files/monit_telemetry @@ -4,8 +4,8 @@ ## telemetry ## dialout_client ############################################################################### -check process telemetry matching "/usr/sbin/telemetry" - if does not exist for 5 times within 5 cycles then alert +check program telemetry|telemetry with path "/usr/bin/process_checker telemetry /usr/sbin/telemetry" + if status != 0 for 5 times within 5 cycles then alert repeat every 1 cycles -check process dialout_client matching "/usr/sbin/dialout_client_cli" - if does not exist for 5 times within 5 cycles then alert +check program telemetry|dialout_client with path "/usr/bin/process_checker telemetry /usr/sbin/dialout_client_cli" + if status != 0 for 5 times within 5 cycles then alert repeat every 1 cycles diff --git a/dockers/docker-teamd/base_image_files/monit_teamd b/dockers/docker-teamd/base_image_files/monit_teamd new file mode 100644 index 00000000000..626a6145604 --- /dev/null +++ b/dockers/docker-teamd/base_image_files/monit_teamd @@ -0,0 +1,11 @@ +############################################################################### +## Monit configuration for teamd container +## process list: +## teamsyncd +## teammgrd +############################################################################### +check program teamd|teamsyncd with path "/usr/bin/process_checker teamd /usr/bin/teamsyncd" + if status != 0 for 5 times within 5 cycles then alert repeat every 1 cycles + +check program teamd|teammgrd with path "/usr/bin/process_checker teamd /usr/bin/teammgrd" + if status != 0 for 5 times within 5 cycles then alert repeat every 1 cycles diff --git a/files/build_templates/sonic_debian_extension.j2 b/files/build_templates/sonic_debian_extension.j2 index 737ce1271fa..9829fe94bd8 100644 --- a/files/build_templates/sonic_debian_extension.j2 +++ b/files/build_templates/sonic_debian_extension.j2 @@ -122,6 +122,9 @@ sudo rm -rf $FILESYSTEM_ROOT/$REDIS_DUMP_LOAD_PY2_WHEEL_NAME # Install Python module for ipaddress sudo https_proxy=$https_proxy LANG=C chroot $FILESYSTEM_ROOT pip install ipaddress +# Install Python module for psutil +sudo https_proxy=$https_proxy LANG=C chroot $FILESYSTEM_ROOT pip install psutil + # Install SwSS SDK Python 3 package # Note: the scripts will be overwritten by corresponding Python 2 package if [ -e {{swsssdk_py3_wheel_path}} ]; then @@ -222,6 +225,8 @@ sudo cp $IMAGE_CONFIGS/monit/monitrc $FILESYSTEM_ROOT/etc/monit/ sudo chmod 600 $FILESYSTEM_ROOT/etc/monit/monitrc sudo cp $IMAGE_CONFIGS/monit/conf.d/* $FILESYSTEM_ROOT/etc/monit/conf.d/ sudo chmod 600 $FILESYSTEM_ROOT/etc/monit/conf.d/* +sudo cp $IMAGE_CONFIGS/monit/process_checker $FILESYSTEM_ROOT/usr/bin/ +sudo chmod 755 $FILESYSTEM_ROOT/usr/bin/process_checker # Copy crontabs sudo cp -f $IMAGE_CONFIGS/cron.d/* $FILESYSTEM_ROOT/etc/cron.d/ diff --git a/files/image_config/monit/conf.d/sonic-host b/files/image_config/monit/conf.d/sonic-host index 5a67f7a9909..d191759b76e 100644 --- a/files/image_config/monit/conf.d/sonic-host +++ b/files/image_config/monit/conf.d/sonic-host @@ -6,17 +6,28 @@ ############################################################################### check filesystem root-overlay with path / - if space usage > 90% for 10 times within 20 cycles then alert + if space usage > 90% for 10 times within 20 cycles then alert repeat every 1 cycles check filesystem var-log with path /var/log - if space usage > 90% for 10 times within 20 cycles then alert + if space usage > 90% for 10 times within 20 cycles then alert repeat every 1 cycles check system $HOST - if memory usage > 90% for 10 times within 20 cycles then alert - if cpu usage (user) > 90% for 10 times within 20 cycles then alert - if cpu usage (system) > 90% for 10 times within 20 cycles then alert + if memory usage > 90% for 10 times within 20 cycles then alert repeat every 1 cycles + if cpu usage (user) > 90% for 10 times within 20 cycles then alert repeat every 1 cycles + if cpu usage (system) > 90% for 10 times within 20 cycles then alert repeat every 1 cycles check process rsyslog with pidfile /var/run/rsyslogd.pid start program = "/bin/systemctl start rsyslog.service" stop program = "/bin/systemctl stop rsyslog.service" if totalmem > 800 MB for 10 times within 20 cycles then restart + +# route_check.py Verify routes between APPL-DB & ASIC-DB are in sync. +# For any discrepancy, details are logged and a non-zero code is returned +# which would trigger a monit alert. +# Hence for any discrepancy, there will be log messages for "ERR" level +# from both route_check.py & monit. +# +check program routeCheck with path "/usr/bin/route_check.py" + every 5 cycles + if status != 0 for 3 cycle then alert repeat every 1 cycles + diff --git a/files/image_config/monit/process_checker b/files/image_config/monit/process_checker new file mode 100755 index 00000000000..ba48e37729a --- /dev/null +++ b/files/image_config/monit/process_checker @@ -0,0 +1,57 @@ +#!/usr/bin/python +import argparse +import sys +import syslog + +import psutil +import swsssdk + + +def check_process_existence(container_name, process_cmdline): + """ + @summary: Check whether the process in the specified container is running or not and + an alerting message will written into syslog if it failed to run. + """ + config_db = swsssdk.ConfigDBConnector() + config_db.connect() + feature_table = config_db.get_table("FEATURE") + + if container_name in feature_table.keys(): + # We look into the 'FEATURE' table to verify whether the container is disabled or not. + # If the container is diabled, we exit. + if ("state" in feature_table[container_name].keys() + and feature_table[container_name]["state"] == "disabled"): + sys.exit(0) + else: + # We leveraged the psutil library to help us check whether the process is running or not. + # If the process entity is found in process tree and it is also in the 'running' or 'sleeping' + # state, then it will be marked as 'running'. + is_running = False + for process in psutil.process_iter(["cmdline", "status"]): + if ((' '.join(process.cmdline())).startswith(process_cmdline) and process.status() in ["running", "sleeping"]): + is_running = True + break + + if not is_running: + # If this script is run by Monit, then the following output will be appended to + # Monit's syslog message. + print("'{}' is not running.".format(process_cmdline)) + sys.exit(1) + else: + syslog.syslog(syslog.LOG_ERR, "container '{}' is not included in SONiC image or the given container name is invalid!" + .format(container_name)) + + +def main(): + parser = argparse.ArgumentParser(description="Check whether the process in the specified \ + container is running and an alerting message will be written into syslog if it \ + failed to run.", usage="/usr/bin/process_checker ") + parser.add_argument("container_name", help="container name") + parser.add_argument("process_cmdline", nargs=argparse.REMAINDER, help="process command line") + args = parser.parse_args() + + check_process_existence(args.container_name, ' '.join(args.process_cmdline)) + + +if __name__ == '__main__': + main() diff --git a/platform/barefoot/docker-syncd-bfn/base_image_files/monit_syncd b/platform/barefoot/docker-syncd-bfn/base_image_files/monit_syncd index 75391f90ac3..61e290e3189 100644 --- a/platform/barefoot/docker-syncd-bfn/base_image_files/monit_syncd +++ b/platform/barefoot/docker-syncd-bfn/base_image_files/monit_syncd @@ -3,5 +3,5 @@ ## process list: ## syncd ############################################################################### -check process syncd matching "/usr/bin/syncd\s" - if does not exist for 5 times within 5 cycles then alert +check program syncd|syncd with path "/usr/bin/process_checker syncd /usr/bin/syncd" + if status != 0 for 5 times within 5 cycles then alert repeat every 1 cycles diff --git a/platform/broadcom/docker-syncd-brcm/base_image_files/monit_syncd b/platform/broadcom/docker-syncd-brcm/base_image_files/monit_syncd index 81c0b6ef6bc..d63346d9ee2 100644 --- a/platform/broadcom/docker-syncd-brcm/base_image_files/monit_syncd +++ b/platform/broadcom/docker-syncd-brcm/base_image_files/monit_syncd @@ -4,8 +4,8 @@ ## syncd ## dsserve ############################################################################### -check process syncd matching "/usr/bin/syncd\s" - if does not exist for 5 times within 5 cycles then alert +check program syncd|syncd with path "/usr/bin/process_checker syncd /usr/bin/syncd" + if status != 0 for 5 times within 5 cycles then alert repeat every 1 cycles -check process dsserve matching "/usr/bin/dsserve /usr/bin/syncd" - if does not exist for 5 times within 5 cycles then alert +check program syncd|dsserve with path "/usr/bin/process_checker syncd /usr/bin/dsserve /usr/bin/syncd" + if status != 0 for 5 times within 5 cycles then alert repeat every 1 cycles diff --git a/platform/cavium/docker-syncd-cavm/base_image_files/monit_syncd b/platform/cavium/docker-syncd-cavm/base_image_files/monit_syncd index 75391f90ac3..61e290e3189 100644 --- a/platform/cavium/docker-syncd-cavm/base_image_files/monit_syncd +++ b/platform/cavium/docker-syncd-cavm/base_image_files/monit_syncd @@ -3,5 +3,5 @@ ## process list: ## syncd ############################################################################### -check process syncd matching "/usr/bin/syncd\s" - if does not exist for 5 times within 5 cycles then alert +check program syncd|syncd with path "/usr/bin/process_checker syncd /usr/bin/syncd" + if status != 0 for 5 times within 5 cycles then alert repeat every 1 cycles diff --git a/platform/centec/docker-syncd-centec/base_image_files/monit_syncd b/platform/centec/docker-syncd-centec/base_image_files/monit_syncd index 75391f90ac3..61e290e3189 100644 --- a/platform/centec/docker-syncd-centec/base_image_files/monit_syncd +++ b/platform/centec/docker-syncd-centec/base_image_files/monit_syncd @@ -3,5 +3,5 @@ ## process list: ## syncd ############################################################################### -check process syncd matching "/usr/bin/syncd\s" - if does not exist for 5 times within 5 cycles then alert +check program syncd|syncd with path "/usr/bin/process_checker syncd /usr/bin/syncd" + if status != 0 for 5 times within 5 cycles then alert repeat every 1 cycles diff --git a/platform/marvell-arm64/docker-syncd-mrvl/base_image_files/monit_syncd b/platform/marvell-arm64/docker-syncd-mrvl/base_image_files/monit_syncd index 75391f90ac3..61e290e3189 100644 --- a/platform/marvell-arm64/docker-syncd-mrvl/base_image_files/monit_syncd +++ b/platform/marvell-arm64/docker-syncd-mrvl/base_image_files/monit_syncd @@ -3,5 +3,5 @@ ## process list: ## syncd ############################################################################### -check process syncd matching "/usr/bin/syncd\s" - if does not exist for 5 times within 5 cycles then alert +check program syncd|syncd with path "/usr/bin/process_checker syncd /usr/bin/syncd" + if status != 0 for 5 times within 5 cycles then alert repeat every 1 cycles diff --git a/platform/marvell-armhf/docker-syncd-mrvl/base_image_files/monit_syncd b/platform/marvell-armhf/docker-syncd-mrvl/base_image_files/monit_syncd index 75391f90ac3..61e290e3189 100644 --- a/platform/marvell-armhf/docker-syncd-mrvl/base_image_files/monit_syncd +++ b/platform/marvell-armhf/docker-syncd-mrvl/base_image_files/monit_syncd @@ -3,5 +3,5 @@ ## process list: ## syncd ############################################################################### -check process syncd matching "/usr/bin/syncd\s" - if does not exist for 5 times within 5 cycles then alert +check program syncd|syncd with path "/usr/bin/process_checker syncd /usr/bin/syncd" + if status != 0 for 5 times within 5 cycles then alert repeat every 1 cycles diff --git a/platform/marvell/docker-syncd-mrvl/base_image_files/monit_syncd b/platform/marvell/docker-syncd-mrvl/base_image_files/monit_syncd index 75391f90ac3..61e290e3189 100644 --- a/platform/marvell/docker-syncd-mrvl/base_image_files/monit_syncd +++ b/platform/marvell/docker-syncd-mrvl/base_image_files/monit_syncd @@ -3,5 +3,5 @@ ## process list: ## syncd ############################################################################### -check process syncd matching "/usr/bin/syncd\s" - if does not exist for 5 times within 5 cycles then alert +check program syncd|syncd with path "/usr/bin/process_checker syncd /usr/bin/syncd" + if status != 0 for 5 times within 5 cycles then alert repeat every 1 cycles diff --git a/platform/mellanox/docker-syncd-mlnx/base_image_files/monit_syncd b/platform/mellanox/docker-syncd-mlnx/base_image_files/monit_syncd index 75391f90ac3..61e290e3189 100644 --- a/platform/mellanox/docker-syncd-mlnx/base_image_files/monit_syncd +++ b/platform/mellanox/docker-syncd-mlnx/base_image_files/monit_syncd @@ -3,5 +3,5 @@ ## process list: ## syncd ############################################################################### -check process syncd matching "/usr/bin/syncd\s" - if does not exist for 5 times within 5 cycles then alert +check program syncd|syncd with path "/usr/bin/process_checker syncd /usr/bin/syncd" + if status != 0 for 5 times within 5 cycles then alert repeat every 1 cycles diff --git a/platform/nephos/docker-syncd-nephos/base_image_files/monit_syncd b/platform/nephos/docker-syncd-nephos/base_image_files/monit_syncd index 81c0b6ef6bc..d63346d9ee2 100644 --- a/platform/nephos/docker-syncd-nephos/base_image_files/monit_syncd +++ b/platform/nephos/docker-syncd-nephos/base_image_files/monit_syncd @@ -4,8 +4,8 @@ ## syncd ## dsserve ############################################################################### -check process syncd matching "/usr/bin/syncd\s" - if does not exist for 5 times within 5 cycles then alert +check program syncd|syncd with path "/usr/bin/process_checker syncd /usr/bin/syncd" + if status != 0 for 5 times within 5 cycles then alert repeat every 1 cycles -check process dsserve matching "/usr/bin/dsserve /usr/bin/syncd" - if does not exist for 5 times within 5 cycles then alert +check program syncd|dsserve with path "/usr/bin/process_checker syncd /usr/bin/dsserve /usr/bin/syncd" + if status != 0 for 5 times within 5 cycles then alert repeat every 1 cycles diff --git a/rules/docker-teamd.mk b/rules/docker-teamd.mk index 49c9dde9ba1..b1203914bf5 100644 --- a/rules/docker-teamd.mk +++ b/rules/docker-teamd.mk @@ -29,4 +29,5 @@ $(DOCKER_TEAMD)_RUN_OPT += -v /etc/sonic:/etc/sonic:ro $(DOCKER_TEAMD)_RUN_OPT += -v /host/warmboot:/var/warmboot $(DOCKER_TEAMD)_BASE_IMAGE_FILES += teamdctl:/usr/bin/teamdctl +$(DOCKER_TEAMD)_BASE_IMAGE_FILES += monit_teamd:/etc/monit/conf.d $(DOCKER_TEAMD)_FILES += $(SUPERVISOR_PROC_EXIT_LISTENER_SCRIPT) diff --git a/src/monit/patch/0002-change_monit_alert_log_error.patch b/src/monit/patch/0002-change_monit_alert_log_error.patch new file mode 100644 index 00000000000..1e43078e621 --- /dev/null +++ b/src/monit/patch/0002-change_monit_alert_log_error.patch @@ -0,0 +1,64 @@ +From 97a5defc6a7fcc6a00f691bb5314ceb8fb7704e9 Mon Sep 17 00:00:00 2001 +From: Abhishek Dosi +Date: Mon, 26 Oct 2020 11:40:02 -0700 +Subject: [PATCH] Patch on top of commit Patch is addressing these changes:- + +a) Enable repeat keyword for alert action . Using this we can log +syslog error message for persistent failure condition + +b) Make sure error message is loggged if state is changed to fail first time (fault tolerance condition) +or we have repeat clause for alert + +Signed-off-by: Abhishek Dosi + +--- + src/event.c | 6 +++++- + src/p.y | 8 +++++++- + 2 files changed, 12 insertions(+), 2 deletions(-) + +diff --git a/src/event.c b/src/event.c +index ed363ee..9d08fc0 100644 +--- a/src/event.c ++++ b/src/event.c +@@ -336,7 +336,8 @@ static void _handleEvent(Service_T S, Event_T E) { + if (E->state != State_Init || E->state_map & 0x1) { + if (E->state == State_Succeeded || E->state == State_ChangedNot || E->id == Event_Instance || E->id == Event_Action) + LogInfo("'%s' %s\n", S->name, E->message); +- else ++ /* Send Error log if state change to failed for 1st time or if we have repeat clause then do periodically */ ++ else if ((E->state_changed) || (E->state == State_Failed && E->action->failed->repeat && E->count % E->action->failed->repeat == 0)) + LogError("'%s' %s\n", S->name, E->message); + } + if (E->state == State_Init) + return; +diff --git a/src/p.y b/src/p.y +index a57807d..b46b1a1 100644 +--- a/src/p.y ++++ b/src/p.y +@@ -2250,9 +2250,12 @@ repeat : /* EMPTY */ { + } + ; + +-action : ALERT { ++action : ALERT repeat{ + $$ = Action_Alert; + } ++ | ALERT { ++ $$ = Action_Alert; ++ } + | EXEC argumentlist repeat { + $$ = Action_Exec; + } +@@ -2281,6 +2284,9 @@ action1 : action { + repeat = 0; + command1 = command; + command = NULL; ++ } else if ($1 == Action_Alert) { ++ repeat1 = repeat; ++ repeat = 0; + } + } + ; +-- +2.17.1 + diff --git a/src/monit/patch/series b/src/monit/patch/series index 15fcdd50c8a..f5534d0f554 100644 --- a/src/monit/patch/series +++ b/src/monit/patch/series @@ -1,2 +1,3 @@ # This series applies on GIT commit dc9bc1c949125140d967edfc598dfad47eedc552 0001-used_system_memory_sysdep-Use-MemAvailable-value-if-.patch +0002-change_monit_alert_log_error.patch