From cf3048c4a85bd03f30251f43de68320a20f8834a Mon Sep 17 00:00:00 2001 From: Sridhar Ravindran Date: Thu, 26 Sep 2019 11:17:23 -0700 Subject: [PATCH] [DELL] S6100, Z9100 Last Reboot Reason Watchdog, Thermal Support in November build --- files/image_config/platform/rc.local | 7 ++ .../common/platform_reboot | 6 + .../debian/platform-modules-s6100.install | 3 + .../debian/platform-modules-z9100.install | 3 + .../s6100/scripts/fastboot_plugin | 6 + .../s6100/scripts/s6100_platform.sh | 18 +-- .../s6100/scripts/track_reboot_reason.sh | 111 ++++++++++++++++++ .../s6100/sonic_platform/chassis.py | 13 +- .../z9100/scripts/fastboot_plugin | 6 + .../z9100/scripts/track_reboot_reason.sh | 111 ++++++++++++++++++ .../z9100/scripts/z9100_platform.sh | 7 ++ .../z9100/sonic_platform/chassis.py | 32 ++++- 12 files changed, 299 insertions(+), 24 deletions(-) create mode 100755 platform/broadcom/sonic-platform-modules-dell/s6100/scripts/fastboot_plugin create mode 100755 platform/broadcom/sonic-platform-modules-dell/s6100/scripts/track_reboot_reason.sh create mode 100755 platform/broadcom/sonic-platform-modules-dell/z9100/scripts/fastboot_plugin create mode 100755 platform/broadcom/sonic-platform-modules-dell/z9100/scripts/track_reboot_reason.sh diff --git a/files/image_config/platform/rc.local b/files/image_config/platform/rc.local index 5ea98a8eccd..b5ed9286607 100755 --- a/files/image_config/platform/rc.local +++ b/files/image_config/platform/rc.local @@ -238,6 +238,13 @@ if [ -f $FIRST_BOOT_FILE ]; then # Notify firstboot to Platform, to use it for reboot-cause touch /tmp/notify_firstboot_to_platform + # create reboot-cause/platform/$platform/ directory + # /host/reboot-cause/platform/reboot-reason + # will be used to track last reboot reason + if [ ! -d /host/reboot-cause/platform ]; then + mkdir -p /host/reboot-cause/platform + fi + if [ -d /host/image-$SONIC_VERSION/platform/$platform ]; then dpkg -i /host/image-$SONIC_VERSION/platform/$platform/*.deb fi diff --git a/platform/broadcom/sonic-platform-modules-dell/common/platform_reboot b/platform/broadcom/sonic-platform-modules-dell/common/platform_reboot index 3e165630658..81d272326ec 100755 --- a/platform/broadcom/sonic-platform-modules-dell/common/platform_reboot +++ b/platform/broadcom/sonic-platform-modules-dell/common/platform_reboot @@ -2,9 +2,14 @@ import sys import os import struct +import subprocess PORT_RES = '/dev/port' +def track_reboot(): + # Run plugin script which will track the cli triggered reboot, fastboot, warmboot + res = subprocess.check_output(['/usr/local/bin/fastboot_plugin']) + return def portio_reg_write(resource, offset, val): fd = os.open(resource, os.O_RDWR) @@ -21,5 +26,6 @@ def portio_reg_write(resource, offset, val): os.close(fd) if __name__ == "__main__": + track_reboot() portio_reg_write(PORT_RES, 0xcf9, 0xe) diff --git a/platform/broadcom/sonic-platform-modules-dell/debian/platform-modules-s6100.install b/platform/broadcom/sonic-platform-modules-dell/debian/platform-modules-s6100.install index 054d44bb8a9..9c802010d04 100644 --- a/platform/broadcom/sonic-platform-modules-dell/debian/platform-modules-s6100.install +++ b/platform/broadcom/sonic-platform-modules-dell/debian/platform-modules-s6100.install @@ -5,6 +5,9 @@ common/io_rd_wr.py usr/local/bin common/fstrim.timer etc/systemd/system common/fstrim.service etc/systemd/system common/platform_reboot usr/share/sonic/device/x86_64-dell_s6100_c2538-r0 +s6100/scripts/fastboot_plugin usr/share/sonic/device/x86_64-dell_s6100_c2538-r0 +s6100/scripts/fastboot_plugin usr/local/bin +s6100/scripts/track_reboot_reason.sh usr/share/sonic/device/x86_64-dell_s6100_c2538-r0 s6100/scripts/platform_sensors.py usr/local/bin s6100/scripts/sensors usr/bin s6100/systemd/platform-modules-s6100.service etc/systemd/system diff --git a/platform/broadcom/sonic-platform-modules-dell/debian/platform-modules-z9100.install b/platform/broadcom/sonic-platform-modules-dell/debian/platform-modules-z9100.install index 08f27cce47d..2066fa1b837 100644 --- a/platform/broadcom/sonic-platform-modules-dell/debian/platform-modules-z9100.install +++ b/platform/broadcom/sonic-platform-modules-dell/debian/platform-modules-z9100.install @@ -2,6 +2,9 @@ z9100/scripts/check_qsfp.sh usr/local/bin z9100/scripts/z9100_platform.sh usr/local/bin common/dell_i2c_utils.sh usr/local/bin common/io_rd_wr.py usr/local/bin +z9100/scripts/fastboot_plugin usr/share/sonic/device/x86_64-dell_z9100_c2538-r0 +z9100/scripts/fastboot_plugin usr/local/bin +z9100/scripts/track_reboot_reason.sh usr/share/sonic/device/x86_64-dell_z9100_c2538-r0 common/fstrim.timer etc/systemd/system common/fstrim.service etc/systemd/system common/platform_reboot usr/share/sonic/device/x86_64-dell_z9100_c2538-r0 diff --git a/platform/broadcom/sonic-platform-modules-dell/s6100/scripts/fastboot_plugin b/platform/broadcom/sonic-platform-modules-dell/s6100/scripts/fastboot_plugin new file mode 100755 index 00000000000..d385be3bc68 --- /dev/null +++ b/platform/broadcom/sonic-platform-modules-dell/s6100/scripts/fastboot_plugin @@ -0,0 +1,6 @@ +#!/bin/bash + +if [[ -d /sys/devices/platform/SMF.512/hwmon/ ]]; then + cd /sys/devices/platform/SMF.512/hwmon/* + echo 0xcc > mb_poweron_reason +fi diff --git a/platform/broadcom/sonic-platform-modules-dell/s6100/scripts/s6100_platform.sh b/platform/broadcom/sonic-platform-modules-dell/s6100/scripts/s6100_platform.sh index 974d0662d56..3d0e0cefd5e 100755 --- a/platform/broadcom/sonic-platform-modules-dell/s6100/scripts/s6100_platform.sh +++ b/platform/broadcom/sonic-platform-modules-dell/s6100/scripts/s6100_platform.sh @@ -218,21 +218,9 @@ reset_muxes() { } track_reboot_reason() { - if [[ -d /sys/devices/platform/SMF.512/hwmon/ ]]; then - rv=$(cd /sys/devices/platform/SMF.512/hwmon/*; cat mb_poweron_reason) - reason=$(echo $rv | cut -d 'x' -f2) - if [ $reason == "ff" ]; then - cd /sys/devices/platform/SMF.512/hwmon/* - if [[ -e /tmp/notify_firstboot_to_platform ]]; then - echo 0x01 > mb_poweron_reason - else - echo 0xbb > mb_poweron_reason - fi - elif [ $reason == "bb" ] || [ $reason == "1" ]; then - cd /sys/devices/platform/SMF.512/hwmon/* - echo 0xaa > mb_poweron_reason - fi - fi + /usr/share/sonic/device/x86_64-dell_s6100_c2538-r0/track_reboot_reason.sh + status=$(echo $?) + return status } install_python_api_package() { diff --git a/platform/broadcom/sonic-platform-modules-dell/s6100/scripts/track_reboot_reason.sh b/platform/broadcom/sonic-platform-modules-dell/s6100/scripts/track_reboot_reason.sh new file mode 100755 index 00000000000..71fff409ad0 --- /dev/null +++ b/platform/broadcom/sonic-platform-modules-dell/s6100/scripts/track_reboot_reason.sh @@ -0,0 +1,111 @@ +#!/bin/bash + +syslog_found=false +syslog_1_found=false +reboot_dir_found=false +reboot_file_found=false + +if [[ -f /var/log/syslog ]]; then + syslog_found=true +fi + +if [[ -f /var/log/syslog.1 ]]; then + syslog_1_found=true +fi + +if [[ -d /host/reboot-cause/platform ]]; then + reboot_dir_found=true + if [[ -f /host/reboot-cause/platform/reboot_reason ]]; then + reboot_file_found=true + fi +fi + +track_thermal() { + prev_thermal=$(cat /host/reboot-cause/platform/reboot_reason | grep thermal | cut -d ' ' -f2-4) + curr_poweron_reason=$(cd /sys/devices/platform/SMF.512/hwmon/*; cat smf_poweron_reason) + if [[ $curr_poweron_reason = "11" ]]; then + return 0 + fi + if [[ $prev_thermal = $curr_poweron_reason ]]; then + return 0 + else + sed -i "2s/.*/thermal $curr_poweron_reason/" /host/reboot-cause/platform/reboot_reason + return 1 + fi + + return 0 +} + +track_watchdog() +{ + if [[ $syslog_found = true ]]; then + wd_log="$(tac /var/log/syslog | grep -a "watchdog did not stop" | grep -a "CRIT kernel")" + if [[ $wd_log = "" ]] && [[ $syslog_1_found = true ]]; then + wd_log="$(tac /var/log/syslog.1 | grep -a "watchdog did not stop" | grep -a "CRIT kernel")" + fi + fi + + if [[ $wd_log = "" ]]; then + return 0 + fi + + if [[ $reboot_dir_found = true ]]; then + echo "$wd_log" > /host/reboot-cause/platform/tmp + wd_log=$(head -n 1 /host/reboot-cause/platform/tmp) + wd_timestamp=$(echo $wd_log | cut -d ' ' -f1-3) + wd_date=$(echo $wd_timestamp | cut -d '.' -f1) + + if [[ $reboot_file_found = true ]]; then + prev_wd=$(cat /host/reboot-cause/platform/reboot_reason | grep watchdog | cut -d ' ' -f2-4) + + if [[ $prev_wd = $wd_date ]]; then + rm -rf /host/reboot-cause/platform/tmp + return 0 + else + sed -i "1s/.*/watchdog $wd_date/" /host/reboot-cause/platform/reboot_reason + rm -rf /host/reboot-cause/platform/tmp + return 1 + fi + fi + fi +} + +track_reboot_reason() { + if [[ $reboot_file_found = false ]]; then + echo "watchdog None" > /host/reboot-cause/platform/reboot_reason + echo "thermal None" >> /host/reboot-cause/platform/reboot_reason + fi + + if [[ -d /sys/devices/platform/SMF.512/hwmon/ ]]; then + track_thermal + is_thermal_reboot=$(echo $?) + + track_watchdog + is_wd_reboot=$(echo $?) + + rv=$(cd /sys/devices/platform/SMF.512/hwmon/*; cat mb_poweron_reason) + reason=$(echo $rv | cut -d 'x' -f2) + if [[ $reason = "ff" ]]; then + sed -i "2s/.*/thermal None/" /host/reboot-cause/platform/reboot_reason + cd /sys/devices/platform/SMF.512/hwmon/* + if [[ -e /tmp/notify_firstboot_to_platform ]]; then + echo 0x01 > mb_poweron_reason + else + echo 0xbb > mb_poweron_reason + fi + elif [[ $is_thermal_reboot = 1 ]]; then + cd /sys/devices/platform/SMF.512/hwmon/* + echo 0xee > mb_poweron_reason + elif [[ $is_wd_reboot = 1 ]]; then + cd /sys/devices/platform/SMF.512/hwmon/* + echo 0xdd > mb_poweron_reason + elif [[ $reason = "cc" ]]; then + cd /sys/devices/platform/SMF.512/hwmon/* + echo 0xaa > mb_poweron_reason + else + cd /sys/devices/platform/SMF.512/hwmon/* + echo 0x99 > mb_poweron_reason + fi + fi +} +track_reboot_reason diff --git a/platform/broadcom/sonic-platform-modules-dell/s6100/sonic_platform/chassis.py b/platform/broadcom/sonic-platform-modules-dell/s6100/sonic_platform/chassis.py index ea57f368914..e0afe535792 100755 --- a/platform/broadcom/sonic-platform-modules-dell/s6100/sonic_platform/chassis.py +++ b/platform/broadcom/sonic-platform-modules-dell/s6100/sonic_platform/chassis.py @@ -89,11 +89,16 @@ def get_reboot_cause(self): if (power_reason in self.power_reason_dict): return (self.power_reason_dict[power_reason], None) else: - if ((smf_mb_reg_reason == 0xbb) or (smf_mb_reg_reason == 0xff)): + if (smf_mb_reg_reason == 0xaa): + return (ChassisBase.REBOOT_CAUSE_NON_HARDWARE, None) + elif ((smf_mb_reg_reason == 0xbb) or (smf_mb_reg_reason == 0xff)): return (ChassisBase.REBOOT_CAUSE_POWER_LOSS, None) - - if (reset_reason in self.reset_reason_dict): - return (self.reset_reason_dict[reset_reason], None) + elif (smf_mb_reg_reason == 0xdd): + return (ChassisBase.REBOOT_CAUSE_WATCHDOG, None) + elif (smf_mb_reg_reason == 0xee): + return (self.power_reason_dict[power_reason], None) + else: + return (ChassisBase.REBOOT_CAUSE_NON_HARDWARE, None) return (ChassisBase.REBOOT_CAUSE_HARDWARE_OTHER, "Invalid Reason") diff --git a/platform/broadcom/sonic-platform-modules-dell/z9100/scripts/fastboot_plugin b/platform/broadcom/sonic-platform-modules-dell/z9100/scripts/fastboot_plugin new file mode 100755 index 00000000000..d385be3bc68 --- /dev/null +++ b/platform/broadcom/sonic-platform-modules-dell/z9100/scripts/fastboot_plugin @@ -0,0 +1,6 @@ +#!/bin/bash + +if [[ -d /sys/devices/platform/SMF.512/hwmon/ ]]; then + cd /sys/devices/platform/SMF.512/hwmon/* + echo 0xcc > mb_poweron_reason +fi diff --git a/platform/broadcom/sonic-platform-modules-dell/z9100/scripts/track_reboot_reason.sh b/platform/broadcom/sonic-platform-modules-dell/z9100/scripts/track_reboot_reason.sh new file mode 100755 index 00000000000..71fff409ad0 --- /dev/null +++ b/platform/broadcom/sonic-platform-modules-dell/z9100/scripts/track_reboot_reason.sh @@ -0,0 +1,111 @@ +#!/bin/bash + +syslog_found=false +syslog_1_found=false +reboot_dir_found=false +reboot_file_found=false + +if [[ -f /var/log/syslog ]]; then + syslog_found=true +fi + +if [[ -f /var/log/syslog.1 ]]; then + syslog_1_found=true +fi + +if [[ -d /host/reboot-cause/platform ]]; then + reboot_dir_found=true + if [[ -f /host/reboot-cause/platform/reboot_reason ]]; then + reboot_file_found=true + fi +fi + +track_thermal() { + prev_thermal=$(cat /host/reboot-cause/platform/reboot_reason | grep thermal | cut -d ' ' -f2-4) + curr_poweron_reason=$(cd /sys/devices/platform/SMF.512/hwmon/*; cat smf_poweron_reason) + if [[ $curr_poweron_reason = "11" ]]; then + return 0 + fi + if [[ $prev_thermal = $curr_poweron_reason ]]; then + return 0 + else + sed -i "2s/.*/thermal $curr_poweron_reason/" /host/reboot-cause/platform/reboot_reason + return 1 + fi + + return 0 +} + +track_watchdog() +{ + if [[ $syslog_found = true ]]; then + wd_log="$(tac /var/log/syslog | grep -a "watchdog did not stop" | grep -a "CRIT kernel")" + if [[ $wd_log = "" ]] && [[ $syslog_1_found = true ]]; then + wd_log="$(tac /var/log/syslog.1 | grep -a "watchdog did not stop" | grep -a "CRIT kernel")" + fi + fi + + if [[ $wd_log = "" ]]; then + return 0 + fi + + if [[ $reboot_dir_found = true ]]; then + echo "$wd_log" > /host/reboot-cause/platform/tmp + wd_log=$(head -n 1 /host/reboot-cause/platform/tmp) + wd_timestamp=$(echo $wd_log | cut -d ' ' -f1-3) + wd_date=$(echo $wd_timestamp | cut -d '.' -f1) + + if [[ $reboot_file_found = true ]]; then + prev_wd=$(cat /host/reboot-cause/platform/reboot_reason | grep watchdog | cut -d ' ' -f2-4) + + if [[ $prev_wd = $wd_date ]]; then + rm -rf /host/reboot-cause/platform/tmp + return 0 + else + sed -i "1s/.*/watchdog $wd_date/" /host/reboot-cause/platform/reboot_reason + rm -rf /host/reboot-cause/platform/tmp + return 1 + fi + fi + fi +} + +track_reboot_reason() { + if [[ $reboot_file_found = false ]]; then + echo "watchdog None" > /host/reboot-cause/platform/reboot_reason + echo "thermal None" >> /host/reboot-cause/platform/reboot_reason + fi + + if [[ -d /sys/devices/platform/SMF.512/hwmon/ ]]; then + track_thermal + is_thermal_reboot=$(echo $?) + + track_watchdog + is_wd_reboot=$(echo $?) + + rv=$(cd /sys/devices/platform/SMF.512/hwmon/*; cat mb_poweron_reason) + reason=$(echo $rv | cut -d 'x' -f2) + if [[ $reason = "ff" ]]; then + sed -i "2s/.*/thermal None/" /host/reboot-cause/platform/reboot_reason + cd /sys/devices/platform/SMF.512/hwmon/* + if [[ -e /tmp/notify_firstboot_to_platform ]]; then + echo 0x01 > mb_poweron_reason + else + echo 0xbb > mb_poweron_reason + fi + elif [[ $is_thermal_reboot = 1 ]]; then + cd /sys/devices/platform/SMF.512/hwmon/* + echo 0xee > mb_poweron_reason + elif [[ $is_wd_reboot = 1 ]]; then + cd /sys/devices/platform/SMF.512/hwmon/* + echo 0xdd > mb_poweron_reason + elif [[ $reason = "cc" ]]; then + cd /sys/devices/platform/SMF.512/hwmon/* + echo 0xaa > mb_poweron_reason + else + cd /sys/devices/platform/SMF.512/hwmon/* + echo 0x99 > mb_poweron_reason + fi + fi +} +track_reboot_reason diff --git a/platform/broadcom/sonic-platform-modules-dell/z9100/scripts/z9100_platform.sh b/platform/broadcom/sonic-platform-modules-dell/z9100/scripts/z9100_platform.sh index 276acd882f1..d67cd7bbc8f 100755 --- a/platform/broadcom/sonic-platform-modules-dell/z9100/scripts/z9100_platform.sh +++ b/platform/broadcom/sonic-platform-modules-dell/z9100/scripts/z9100_platform.sh @@ -200,6 +200,12 @@ init_switch_port_led() { } +track_reboot_reason() { + /usr/share/sonic/device/x86_64-dell_z9100_c2538-r0/track_reboot_reason.sh + status=$(echo $?) + return status +} + install_python_api_package() { device="/usr/share/sonic/device" platform=$(/usr/local/bin/sonic-cfggen -H -v DEVICE_METADATA.localhost.platform) @@ -222,6 +228,7 @@ if [[ "$1" == "init" ]]; then modprobe dell_ich modprobe dell_mailbox modprobe dell_z9100_cpld + track_reboot_reason cpu_board_mux "new_device" switch_board_mux "new_device" diff --git a/platform/broadcom/sonic-platform-modules-dell/z9100/sonic_platform/chassis.py b/platform/broadcom/sonic-platform-modules-dell/z9100/sonic_platform/chassis.py index 6245f8efdc0..aef60bec129 100755 --- a/platform/broadcom/sonic-platform-modules-dell/z9100/sonic_platform/chassis.py +++ b/platform/broadcom/sonic-platform-modules-dell/z9100/sonic_platform/chassis.py @@ -39,7 +39,7 @@ class Chassis(ChassisBase): def __init__(self): ChassisBase.__init__(self) - def get_pmc_register(self, reg_name): + def _get_pmc_register(self, reg_name): rv = 'ERR' mb_reg_file = self.MAILBOX_DIR+'/'+reg_name @@ -57,12 +57,23 @@ def get_pmc_register(self, reg_name): rv = rv.lstrip(" ") return rv + def _get_reboot_reason_smf_register(self): + # Returns 0xAA on software reload + # Returns 0xFF on power-cycle + # Returns 0x01 on first-boot + smf_mb_reg_reason = self._get_pmc_register('mb_poweron_reason') + return int(smf_mb_reg_reason, 16) + def get_reboot_cause(self): """ Retrieves the cause of the previous reboot """ - reset_reason = int(self.get_pmc_register('smf_reset_reason')) - power_reason = int(self.get_pmc_register('smf_poweron_reason')) + reset_reason = int(self._get_pmc_register('smf_reset_reason')) + power_reason = int(self._get_pmc_register('smf_poweron_reason')) + smf_mb_reg_reason = self._get_reboot_reason_smf_register() + + if ((smf_mb_reg_reason == 0x01) and (power_reason == 0x11)): + return (ChassisBase.REBOOT_CAUSE_NON_HARDWARE, None) # Reset_Reason = 11 ==> PowerLoss # So return the reboot reason from Last Power_Reason Dictionary @@ -71,12 +82,23 @@ def get_reboot_cause(self): # checking key presence in dictionary else return # REBOOT_CAUSE_HARDWARE_OTHER as the Power_Reason and Reset_Reason # registers returned invalid data + + # In Z9100, if Reset_Reason is not 11 and smf_mb_reg_reason + # is ff or bb, then it is PowerLoss if (reset_reason == 11): if (power_reason in self.power_reason_dict): return (self.power_reason_dict[power_reason], None) else: - if (reset_reason in self.reset_reason_dict): - return (self.reset_reason_dict[reset_reason], None) + if (smf_mb_reg_reason == 0xaa): + return (ChassisBase.REBOOT_CAUSE_NON_HARDWARE, None) + elif ((smf_mb_reg_reason == 0xbb) or (smf_mb_reg_reason == 0xff)): + return (ChassisBase.REBOOT_CAUSE_POWER_LOSS, None) + elif (smf_mb_reg_reason == 0xdd): + return (ChassisBase.REBOOT_CAUSE_WATCHDOG, None) + elif (smf_mb_reg_reason == 0xee): + return (self.power_reason_dict[power_reason], None) + else: + return (ChassisBase.REBOOT_CAUSE_NON_HARDWARE, None) return (ChassisBase.REBOOT_CAUSE_HARDWARE_OTHER, "Invalid Reason")