Skip to content
Merged
Show file tree
Hide file tree
Changes from 5 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 2 additions & 2 deletions dockers/docker-database/base_image_files/monit_database
Original file line number Diff line number Diff line change
Expand Up @@ -3,5 +3,5 @@
## process list:
## redis_server
###############################################################################
check process redis_server matching "/usr/bin/redis-server"
if does not exist for 5 times within 5 cycles then alert
check program database|redis_server with path "/usr/bin/process_checker database redis-server /usr/bin/redis-server"
if status != 0 for 5 times within 5 cycles then alert
20 changes: 10 additions & 10 deletions dockers/docker-fpm-frr/base_image_files/monit_bgp
Original file line number Diff line number Diff line change
Expand Up @@ -7,17 +7,17 @@
## staticd
## bgpcfgd
###############################################################################
check process zebra matching "/usr/lib/frr/zebra"
if does not exist for 5 times within 5 cycles then alert
check program bgp|zebra with path "/usr/bin/process_checker bgp zebra /usr/lib/frr/zebra"
if status != 0 for 5 times within 5 cycles then alert

check process fpmsyncd matching "fpmsyncd"
if does not exist for 5 times within 5 cycles then alert
check program bgp|fpmsyncd with path "/usr/bin/process_checker bgp fpmsyncd fpmsyncd"
if status != 0 for 5 times within 5 cycles then alert

check process bgpd matching "/usr/lib/frr/bgpd"
if does not exist for 5 times within 5 cycles then alert
check program bgp|bgpd with path "/usr/bin/process_checker bgp bgpd /usr/lib/frr/bgpd"
if status != 0 for 5 times within 5 cycles then alert

check process staticd matching "/usr/lib/frr/staticd"
if does not exist for 5 times within 5 cycles then alert
check program bgp|staticd with path "/usr/bin/process_checker bgp staticd /usr/lib/frr/staticd"
if status != 0 for 5 times within 5 cycles then alert

check process bgpcfgd matching "python /usr/local/bin/bgpcfgd"
if does not exist for 5 times within 5 cycles then alert
check program bgp|bgpcfgd with path "/usr/bin/process_checker bgp bgpcfgd python /usr/local/bin/bgpcfgd"
if status != 0 for 5 times within 5 cycles then alert
12 changes: 6 additions & 6 deletions dockers/docker-lldp/base_image_files/monit_lldp
Original file line number Diff line number Diff line change
Expand Up @@ -5,11 +5,11 @@
## lldp-syncd
## lldpmgrd
###############################################################################
check process lldpd_monitor matching "lldpd: "
if does not exist for 5 times within 5 cycles then alert
check program lldp|lldpd_monitor with path "/usr/bin/process_checker lldp lldpd lldpd:"
if status != 0 for 5 times within 5 cycles then alert

check process lldp_syncd matching "python2 -m lldp_syncd"
if does not exist for 5 times within 5 cycles then alert
check program lldp|lldp_syncd with path "/usr/bin/process_checker lldp lldp_syncd python2 -m lldp_syncd"
if status != 0 for 5 times within 5 cycles then alert

check process lldpmgrd matching "python /usr/bin/lldpmgrd"
if does not exist for 5 times within 5 cycles then alert
check program lldp|lldpmgrd with path "/usr/bin/process_checker lldp lldpmgrd python /usr/bin/lldpmgrd"
if status != 0 for 5 times within 5 cycles then alert
42 changes: 21 additions & 21 deletions dockers/docker-orchagent/base_image_files/monit_swss
Original file line number Diff line number Diff line change
Expand Up @@ -11,33 +11,33 @@
## buffermgrd
## nbrmgrd
## vxlanmgrd
###############################################################################
check process orchagent matching "/usr/bin/orchagent -d /var/log/swss"
if does not exist for 5 times within 5 cycles then alert
##############################################################################
check program swss|orchagent with path "/usr/bin/process_checker swss orchagent /usr/bin/orchagent -d /var/log/swss"
if status != 0 for 5 times within 5 cycles then alert

check process portsyncd matching "/usr/bin/portsyncd"
if does not exist for 5 times within 5 cycles then alert
check program swss|portsyncd with path "/usr/bin/process_checker swss portsyncd /usr/bin/portsyncd"
if status != 0 for 5 times within 5 cycles then alert

check process neighsyncd matching "/usr/bin/neighsyncd"
if does not exist for 5 times within 5 cycles then alert
check program swss|neighsyncd with path "/usr/bin/process_checker swss neighsyncd /usr/bin/neighsyncd"
if status != 0 for 5 times within 5 cycles then alert

check process vrfmgrd matching "/usr/bin/vrfmgrd"
if does not exist for 5 times within 5 cycles then alert
check program swss|vrfmgrd with path "/usr/bin/process_checker swss vrfmgrd /usr/bin/vrfmgrd"
if status != 0 for 5 times within 5 cycles then alert

check process vlanmgrd matching "/usr/bin/vlanmgrd"
if does not exist for 5 times within 5 cycles then alert
check program swss|vlanmgrd with path "/usr/bin/process_checker swss vlanmgrd /usr/bin/vlanmgrd"
if status != 0 for 5 times within 5 cycles then alert

check process intfmgrd matching "/usr/bin/intfmgrd"
if does not exist for 5 times within 5 cycles then alert
check program swss|intfmgrd with path "/usr/bin/process_checker swss intfmgrd /usr/bin/intfmgrd"
if status != 0 for 5 times within 5 cycles then alert

check process portmgrd matching "/usr/bin/portmgrd"
if does not exist for 5 times within 5 cycles then alert
check program swss|portmgrd with path "/usr/bin/process_checker swss portmgrd /usr/bin/portmgrd"
if status != 0 for 5 times within 5 cycles then alert

check process buffermgrd matching "/usr/bin/buffermgrd -l"
if does not exist for 5 times within 5 cycles then alert
check program swss|buffermgrd with path "/usr/bin/process_checker swss buffermgrd /usr/bin/buffermgrd -l"
if status != 0 for 5 times within 5 cycles then alert

check process nbrmgrd matching "/usr/bin/nbrmgrd"
if does not exist for 5 times within 5 cycles then alert
check program swss|nbrmgrd with path "/usr/bin/process_checker swss nbrmgrd /usr/bin/nbrmgrd"
if status != 0 for 5 times within 5 cycles then alert

check process vxlanmgrd matching "/usr/bin/vxlanmgrd"
if does not exist for 5 times within 5 cycles then alert
check program swss|vxlanmgrd with path "/usr/bin/process_checker swss vxlanmgrd /usr/bin/vxlanmgrd"
if status != 0 for 5 times within 5 cycles then alert
4 changes: 2 additions & 2 deletions dockers/docker-sflow/base_image_files/monit_sflow
Original file line number Diff line number Diff line change
Expand Up @@ -3,5 +3,5 @@
## process list:
## sflowmgrd
###############################################################################
check process sflowmgrd matching "/usr/bin/sflowmgrd"
if does not exist for 5 times within 5 cycles then alert
check program sflow|sflowmgrd with path "/usr/bin/process_checker sflow sflowmgrd /usr/bin/sflowmgrd"
if status != 0 for 5 times within 5 cycles then alert
8 changes: 4 additions & 4 deletions dockers/docker-snmp/base_image_files/monit_snmp
Original file line number Diff line number Diff line change
Expand Up @@ -4,8 +4,8 @@
## snmpd
## snmpd_subagent
###############################################################################
check process snmpd matching "/usr/sbin/snmpd\s"
if does not exist for 5 times within 5 cycles then alert
check program snmp|snmpd with path "/usr/bin/process_checker snmp snmpd /usr/sbin/snmpd"
if status != 0 for 5 times within 5 cycles then alert

check process snmp_subagent matching "python3 -m sonic_ax_impl"
if does not exist for 5 times within 5 cycles then alert
check program snmp|snmp_subagent with path "/usr/bin/process_checker snmp snmp_subagent python3.6 -m sonic_ax_impl"
if status != 0 for 5 times within 5 cycles then alert
4 changes: 2 additions & 2 deletions dockers/docker-sonic-restapi/base_image_files/monit_restapi
Original file line number Diff line number Diff line change
Expand Up @@ -3,5 +3,5 @@
## process list:
## restapi
###############################################################################
check process restapi matching "/usr/sbin/go-server-server"
if does not exist for 5 times within 5 cycles then alert
check program restapi|restapi with path "/usr/bin/process_checker restapi restapi /usr/sbin/go-server-server"
if status != 0 for 5 times within 5 cycles then alert
Original file line number Diff line number Diff line change
Expand Up @@ -4,8 +4,8 @@
## telemetry
## dialout_client
###############################################################################
check process telemetry matching "/usr/sbin/telemetry"
if does not exist for 5 times within 5 cycles then alert
check program telemetry|telemetry with path "/usr/bin/process_checker telemetry telemetry /usr/sbin/telemetry"
if status != 0 for 5 times within 5 cycles then alert

check process dialout_client matching "/usr/sbin/dialout_client_cli"
if does not exist for 5 times within 5 cycles then alert
check program telemetry|dialout_client with path "/usr/bin/process_checker telemetry dialout_client /usr/sbin/dialout_client_cli"
if status != 0 for 5 times within 5 cycles then alert
11 changes: 11 additions & 0 deletions dockers/docker-teamd/base_image_files/monit_teamd
Original file line number Diff line number Diff line change
@@ -0,0 +1,11 @@
###############################################################################
## Monit configuration for teamd container
## process list:
## teamsyncd
## teammgrd
###############################################################################
check program teamd|teamsyncd with path "/usr/bin/process_checker teamd teamsyncd /usr/bin/teamsyncd"
if status != 0 for 5 times within 5 cycles then alert

check program teamd|teammgrd with path "/usr/bin/process_checker teamd teammgrd /usr/bin/teammgrd"
if status != 0 for 5 times within 5 cycles then alert
3 changes: 3 additions & 0 deletions files/build_templates/sonic_debian_extension.j2
Original file line number Diff line number Diff line change
Expand Up @@ -228,6 +228,9 @@ sudo cp $IMAGE_CONFIGS/monit/monitrc $FILESYSTEM_ROOT/etc/monit/
sudo chmod 600 $FILESYSTEM_ROOT/etc/monit/monitrc
sudo cp $IMAGE_CONFIGS/monit/conf.d/* $FILESYSTEM_ROOT/etc/monit/conf.d/
sudo chmod 600 $FILESYSTEM_ROOT/etc/monit/conf.d/*
sudo cp $IMAGE_CONFIGS/monit/process_checker $FILESYSTEM_ROOT/usr/bin/
sudo chmod 755 $FILESYSTEM_ROOT/usr/bin/process_checker


# Copy crontabs
sudo cp -f $IMAGE_CONFIGS/cron.d/* $FILESYSTEM_ROOT/etc/cron.d/
Expand Down
64 changes: 64 additions & 0 deletions files/image_config/monit/process_checker
Original file line number Diff line number Diff line change
@@ -0,0 +1,64 @@
#!/usr/bin/python
import argparse
import sys
import syslog

import psutil
import swsssdk


def check_process_existence(container_name, process_name, process_cmdline):
"""
@summary: Check whether the process in the specified container is running or not and
an alerting message will written into syslog if it failed to run.
"""
config_db = swsssdk.ConfigDBConnector()
config_db.connect()
feature_table = config_db.get_table("FEATURE")

if container_name in feature_table.keys():
# We look into the 'FEATURE' table to verify whether the container is disabled or not.
# If the container is diabled, we exit.
if ("state" in feature_table[container_name].keys()
and feature_table[container_name]["state"] == "disabled"):
sys.exit(0)
else:
# We leveraged the psutil library to help us check whether the process is running or not.
# If the process entity is found in process tree and it is also in the 'running' or 'sleeping'
# state, then it will be marked as 'running'.
is_running = False
for process in psutil.process_iter(["name", "cmdline", "status"]):
# The script process_checker has the command line format '/usr/bin/process_checker <container_name>
# <process_name> <process_cmdline>' such as '/usr/bin/process_checker bgp fpmsyncd fpmsyncd'. So
# when using psutil to search process 'fpmsyncd', we should skip the process which ran process_checker
# since it is not 'fpmsyncd' process although 'fpmsyncd' is a sustring of its cmdline.
if process.name() == "process_checker":
continue

if ((process_name == process.name() or process_cmdline in ' '.join(process.cmdline()))
and process.status() in ["running", "sleeping"]):
is_running = True
break

if not is_running:
print("'{}' is not running.".format(process_name))
sys.exit(1)
else:
syslog.syslog(syslog.LOG_ERR, "container '{}' is not included in SONiC image or the given container name is invalid!"
.format(container_name))


def main():
parser = argparse.ArgumentParser(description="Check whether the process in the specified \
container is running and an alerting message will be written into syslog if it \
failed to run.", usage="/usr/bin/process_checker <container_name> <process_name> <process_cmdline>")
parser.add_argument("container_name", help="container name")
parser.add_argument("process_name", help="process name")
parser.add_argument("process_cmdline", nargs=argparse.REMAINDER, help="process name")
args = parser.parse_args()

check_process_existence(args.container_name, args.process_name, ' '.join(args.process_cmdline))


if __name__ == '__main__':
main()
Original file line number Diff line number Diff line change
Expand Up @@ -3,5 +3,5 @@
## process list:
## syncd
###############################################################################
check process syncd matching "/usr/bin/syncd\s"
if does not exist for 5 times within 5 cycles then alert
check program syncd|syncd with path "/usr/bin/process_checker syncd syncd /usr/bin/syncd --diag"
if status != 0 for 5 times within 5 cycles then alert
Original file line number Diff line number Diff line change
Expand Up @@ -4,8 +4,8 @@
## syncd
## dsserve
###############################################################################
check process syncd matching "/usr/bin/syncd\s"
if does not exist for 5 times within 5 cycles then alert
check program syncd|syncd with path "/usr/bin/process_checker syncd syncd /usr/bin/syncd --diag"
if status != 0 for 5 times within 5 cycles then alert

check process dsserve matching "/usr/bin/dsserve /usr/bin/syncd"
if does not exist for 5 times within 5 cycles then alert
check program syncd|dsserve with path "/usr/bin/process_checker syncd dsserve /usr/bin/dsserve /usr/bin/syncd"
if status != 0 for 5 times within 5 cycles then alert
Original file line number Diff line number Diff line change
Expand Up @@ -3,5 +3,5 @@
## process list:
## syncd
###############################################################################
check process syncd matching "/usr/bin/syncd\s"
if does not exist for 5 times within 5 cycles then alert
check program syncd|syncd with path "/usr/bin/process_checker syncd syncd /usr/bin/syncd --diag"
if status != 0 for 5 times within 5 cycles then alert
Original file line number Diff line number Diff line change
Expand Up @@ -3,5 +3,5 @@
## process list:
## syncd
###############################################################################
check process syncd matching "/usr/bin/syncd\s"
if does not exist for 5 times within 5 cycles then alert
check program syncd|syncd with path "/usr/bin/process_checker syncd syncd /usr/bin/syncd --diag"
if status != 0 for 5 times within 5 cycles then alert
Original file line number Diff line number Diff line change
Expand Up @@ -3,5 +3,5 @@
## process list:
## syncd
###############################################################################
check process syncd matching "/usr/bin/syncd\s"
if does not exist for 5 times within 5 cycles then alert
check program syncd|syncd with path "/usr/bin/process_checker syncd syncd /usr/bin/syncd --diag"
if status != 0 for 5 times within 5 cycles then alert
Original file line number Diff line number Diff line change
Expand Up @@ -3,5 +3,5 @@
## process list:
## syncd
###############################################################################
check process syncd matching "/usr/bin/syncd\s"
if does not exist for 5 times within 5 cycles then alert
check program syncd|syncd with path "/usr/bin/process_checker syncd syncd /usr/bin/syncd --diag"
if status != 0 for 5 times within 5 cycles then alert
Original file line number Diff line number Diff line change
Expand Up @@ -3,5 +3,5 @@
## process list:
## syncd
###############################################################################
check process syncd matching "/usr/bin/syncd\s"
if does not exist for 5 times within 5 cycles then alert
check program syncd|syncd with path "/usr/bin/process_checker syncd syncd /usr/bin/syncd --diag"
if status != 0 for 5 times within 5 cycles then alert
Original file line number Diff line number Diff line change
Expand Up @@ -3,5 +3,5 @@
## process list:
## syncd
###############################################################################
check process syncd matching "/usr/bin/syncd\s"
if does not exist for 5 times within 5 cycles then alert
check program syncd|syncd with path "/usr/bin/process_checker syncd syncd /usr/bin/syncd --diag"
if status != 0 for 5 times within 5 cycles then alert
Original file line number Diff line number Diff line change
Expand Up @@ -4,8 +4,8 @@
## syncd
## dsserve
###############################################################################
check process syncd matching "/usr/bin/syncd\s"
if does not exist for 5 times within 5 cycles then alert
check program syncd|syncd with path "/usr/bin/process_checker syncd syncd /usr/bin/syncd --diag"
if status != 0 for 5 times within 5 cycles then alert

check process dsserve matching "/usr/bin/dsserve /usr/bin/syncd"
if does not exist for 5 times within 5 cycles then alert
check program syncd|dsserve with path "/usr/bin/process_checker syncd dsserve /usr/bin/dsserve /usr/bin/syncd"
if status != 0 for 5 times within 5 cycles then alert
1 change: 1 addition & 0 deletions rules/docker-teamd.mk
Original file line number Diff line number Diff line change
Expand Up @@ -27,4 +27,5 @@ $(DOCKER_TEAMD)_RUN_OPT += -v /etc/sonic:/etc/sonic:ro
$(DOCKER_TEAMD)_RUN_OPT += -v /host/warmboot:/var/warmboot

$(DOCKER_TEAMD)_BASE_IMAGE_FILES += teamdctl:/usr/bin/teamdctl
$(DOCKER_TEAMD)_BASE_IMAGE_FILES += monit_teamd:/etc/monit/conf.d
$(DOCKER_TEAMD)_FILES += $(SUPERVISOR_PROC_EXIT_LISTENER_SCRIPT)