Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
#!/bin/bash
trap 'cleanup' 0 1 2 3 6 9 10 11 13 15

LEVEL=99
INTERVAL=5
Expand All @@ -14,11 +15,27 @@ LEVEL3=16000
LEVEL4=19000
LEVEL5=19000

LRR_FILE="/host/reboot-cause/reboot-cause.txt"
I2C_ADAPTER="/sys/class/i2c-adapter/i2c-2/i2c-11"

SENSOR1="$I2C_ADAPTER/11-004c/hwmon/hwmon*/temp1_input"
SENSOR2="$I2C_ADAPTER/11-004d/hwmon/hwmon*/temp1_input"
SENSOR3="$I2C_ADAPTER/11-004e/hwmon/hwmon*/temp1_input"

SENSOR1_MAX="$I2C_ADAPTER/11-004c/hwmon/hwmon*/temp1_max"
SENSOR2_MAX="$I2C_ADAPTER/11-004d/hwmon/hwmon*/temp1_max"
SENSOR3_MAX="$I2C_ADAPTER/11-004e/hwmon/hwmon*/temp1_max"

SENSOR1_MAX_VAL=$(cat $SENSOR1_MAX)
SENSOR2_MAX_VAL=$(cat $SENSOR2_MAX)
SENSOR3_MAX_VAL=$(cat $SENSOR3_MAX)

# Reducing by 63 to differentiate this temperature settings
# from pmon sensors configuration settings
SENSOR1_NEW_MAX=$(expr `echo $SENSOR1_MAX_VAL` + 5000 - 63)
SENSOR2_NEW_MAX=$(expr `echo $SENSOR2_MAX_VAL` + 5000 - 63)
SENSOR3_NEW_MAX=$(expr `echo $SENSOR3_MAX_VAL` + 5000 - 63)

# Three fan trays with each contains two separate fans
# fan1-fan4 fan2-fan5 fan3-fan6
FANTRAY1_FAN1=$I2C_ADAPTER/11-0029/fan1_target
Expand Down Expand Up @@ -46,6 +63,14 @@ function check_module
fi
}

function cleanup
{
echo $SENSOR1_MAX_VAL > $SENSOR1_MAX
echo $SENSOR2_MAX_VAL > $SENSOR2_MAX
echo $SENSOR3_MAX_VAL > $SENSOR3_MAX
exit 1
}

function check_faulty_fan
{

Expand Down Expand Up @@ -123,56 +148,95 @@ function update_fan_speed

function monitor_temp_sensors
{
SENSOR1_CUR_MAX_VAL=$(cat $SENSOR1_MAX)
SENSOR2_CUR_MAX_VAL=$(cat $SENSOR2_MAX)
SENSOR3_CUR_MAX_VAL=$(cat $SENSOR3_MAX)
if [ "$SENSOR1_CUR_MAX_VAL" -ne "$SENSOR1_NEW_MAX" ]
then
SENSOR1_NEW_MAX=$(expr `echo $SENSOR1_CUR_MAX_VAL` + 5000 - 63)
SENSOR1_MAX_VAL=$SENSOR1_CUR_MAX_VAL
echo $SENSOR1_NEW_MAX > $SENSOR1_MAX
fi
if [ "$SENSOR2_CUR_MAX_VAL" -ne "$SENSOR2_NEW_MAX" ]
then
SENSOR2_NEW_MAX=$(expr `echo $SENSOR2_CUR_MAX_VAL` + 5000 - 63)
SENSOR2_MAX_VAL=$SENSOR2_CUR_MAX_VAL
echo $SENSOR2_NEW_MAX > $SENSOR2_MAX
fi
if [ "$SENSOR3_CUR_MAX_VAL" -ne "$SENSOR3_NEW_MAX" ]
then
SENSOR3_NEW_MAX=$(expr `echo $SENSOR3_CUR_MAX_VAL` + 5000 - 63)
SENSOR3_MAX_VAL=$SENSOR3_CUR_MAX_VAL
echo $SENSOR3_NEW_MAX > $SENSOR3_MAX
fi

while true # go through all temp sensor outputs
do
sensor1=$(expr `echo $(cat $SENSOR1)` / 1000)
sensor2=$(expr `echo $(cat $SENSOR2)` / 1000)
sensor3=$(expr `echo $(cat $SENSOR3)` / 1000)
sum=$(($sensor1 + $sensor2 + $sensor3))
sensor_temp=$(($sum/3))

if [ "$sensor_temp" -le "25" ] && [ "$LEVEL" -ne "0" ]
then
# Set Fan Speed to 7000 RPM"
LEVEL=0
update_fan_speed $IDLE
logger "Adjusted FAN Speed to $IDLE RPM against $sensor_temp Temperature"

elif [ "$sensor_temp" -ge "26" ] && [ "$sensor_temp" -le "44" ] && [ "$LEVEL" -ne "1" ]
then
# Set Fan Speed to 10000 RPM"
LEVEL=1
update_fan_speed $LEVEL1
logger "Adjusted FAN Speed to $IDLE RPM against $sensor_temp Temperature"

elif [ "$sensor_temp" -ge "45" ] && [ "$sensor_temp" -le "59" ] && [ "$LEVEL" -ne "2" ]
then
# Set Fan Speed to 13000 RPM"
LEVEL=2
update_fan_speed $LEVEL2
logger "Adjusted FAN Speed to $IDLE RPM against $sensor_temp Temperature"

elif [ "$sensor_temp" -ge "60" ] && [ "$sensor_temp" -le "79" ] && [ "$LEVEL" -ne "3" ]
then
# Set Fan Speed to 16000 RPM"
LEVEL=3
update_fan_speed $LEVEL3
logger "Adjusted FAN Speed to $IDLE RPM against $sensor_temp Temperature"

elif [ "$sensor_temp" -ge "80" ] && [ "$LEVEL" -ne "4" ]
then
# Set Fan Speed to 19000 RPM"
LEVEL=4
update_fan_speed $LEVEL4
logger "Adjusted FAN Speed to $IDLE RPM against $sensor_temp Temperature"
fi
# go through all temp sensor outputs
sensor1=$(expr `echo $(cat $SENSOR1)` / 1000)
sensor2=$(expr `echo $(cat $SENSOR2)` / 1000)
sensor3=$(expr `echo $(cat $SENSOR3)` / 1000)
# All sensors output in 1000's
s1=$(cat $SENSOR1)
s2=$(cat $SENSOR2)
s3=$(cat $SENSOR3)

if [ "$s1" -ge "$SENSOR1_MAX_VAL" ] || [ "$s2" -ge "$SENSOR2_MAX_VAL" ] || [ "$s3" -ge "$SENSOR3_MAX_VAL" ]
then
# Thermal trip is about to happen
echo "Thermal Overload $sensor1 $sensor2 $sensor3" > $LRR_FILE
logger "Shutting down due to over temperature ($sensor1 degree, $sensor2 degree, $sensor3 degree)"
sync
sleep 1 # Give time to send logger message to server
# Assigning the original max values back in sensors
echo $SENSOR1_MAX_VAL > $SENSOR1_MAX
echo $SENSOR2_MAX_VAL > $SENSOR2_MAX
echo $SENSOR3_MAX_VAL > $SENSOR3_MAX

### Not Reached ###
# In case if HW fails to shutdown
/sbin/shutdown -P now
fi
sum=$(($sensor1 + $sensor2 + $sensor3))
sensor_temp=$(($sum/3))

if [ "$sensor_temp" -le "25" ] && [ "$LEVEL" -ne "0" ]
then
# Set Fan Speed to 7000 RPM"
LEVEL=0
update_fan_speed $IDLE
logger "Adjusted FAN Speed to $IDLE RPM against $sensor_temp Temperature"

elif [ "$sensor_temp" -ge "26" ] && [ "$sensor_temp" -le "44" ] && [ "$LEVEL" -ne "1" ]
then
# Set Fan Speed to 10000 RPM"
LEVEL=1
update_fan_speed $LEVEL1
logger "Adjusted FAN Speed to $IDLE RPM against $sensor_temp Temperature"

elif [ "$sensor_temp" -ge "45" ] && [ "$sensor_temp" -le "59" ] && [ "$LEVEL" -ne "2" ]
then
# Set Fan Speed to 13000 RPM"
LEVEL=2
update_fan_speed $LEVEL2
logger "Adjusted FAN Speed to $IDLE RPM against $sensor_temp Temperature"

elif [ "$sensor_temp" -ge "60" ] && [ "$sensor_temp" -le "79" ] && [ "$LEVEL" -ne "3" ]
then
# Set Fan Speed to 16000 RPM"
LEVEL=3
update_fan_speed $LEVEL3
logger "Adjusted FAN Speed to $IDLE RPM against $sensor_temp Temperature"

elif [ "$sensor_temp" -ge "80" ] && [ "$LEVEL" -ne "4" ]
then
# Set Fan Speed to 19000 RPM"
LEVEL=4
update_fan_speed $LEVEL4
logger "Adjusted FAN Speed to $IDLE RPM against $sensor_temp Temperature"
fi

# Check for faulty fan
check_faulty_fan

done

}

# Check drivers for sysfs attributes
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@
import os
import time
import datetime
import struct
import subprocess
from sonic_platform_base.chassis_base import ChassisBase
from sonic_platform.sfp import Sfp
Expand Down Expand Up @@ -41,6 +42,7 @@ class Chassis(ChassisBase):
reset_reason_dict = {}
reset_reason_dict[0xe] = ChassisBase.REBOOT_CAUSE_NON_HARDWARE
reset_reason_dict[0x6] = ChassisBase.REBOOT_CAUSE_NON_HARDWARE
reset_reason_dict[0x7] = ChassisBase.REBOOT_CAUSE_THERMAL_OVERLOAD_OTHER

def __init__(self):
ChassisBase.__init__(self)
Expand Down Expand Up @@ -100,6 +102,36 @@ def _get_cpld_register(self, reg_name):
rv = rv.lstrip(" ")
return rv

def _nvram_write(self, offset, val):
resource = "/dev/nvram"
fd = os.open(resource, os.O_RDWR)
if (fd < 0):
print('File open failed ',resource)
return
if (os.lseek(fd, offset, os.SEEK_SET) != offset):
print('lseek failed on ',resource)
return
ret = os.write(fd, struct.pack('B', val))
if ret != 1:
print('Write failed ',str(ret))
return
os.close(fd)

def _get_thermal_reset(self):
reset_file = "/host/reboot-cause/reboot-cause.txt"
if (not os.path.isfile(reset_file)):
return False
try:
with open(reset_file, 'r') as fd:
rv = fd.read()
except Exception as error:
return False

if "Thermal Overload" in rv:
return True

return False

def get_name(self):
"""
Retrieves the name of the chassis
Expand Down Expand Up @@ -181,6 +213,8 @@ def get_reboot_cause(self):
# NVRAM. Only Warmboot and Coldboot reason are supported here.
# Since it does not support any hardware reason, we return
# non_hardware as default
if self._get_thermal_reset() == True:
self._nvram_write(0x49, 0x7)

lrr = self._get_cpld_register('last_reboot_reason')
if (lrr != 'ERR'):
Expand Down