Skip to content
Merged
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
114 changes: 73 additions & 41 deletions files/image_config/process-reboot-cause/process-reboot-cause
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@ try:
import pwd
import sys
import syslog
import re
except ImportError as err:
raise ImportError("%s - required module not found" % str(err))

Expand All @@ -22,6 +23,16 @@ REBOOT_CAUSE_DIR = "/host/reboot-cause/"
REBOOT_CAUSE_FILE = REBOOT_CAUSE_DIR + "reboot-cause.txt"
PREVIOUS_REBOOT_CAUSE_FILE = REBOOT_CAUSE_DIR + "previous-reboot-cause.txt"
FIRST_BOOT_PLATFORM_FILE = "/tmp/notify_firstboot_to_platform"
REBOOT_TYPE_KEXEC_FILE = "/proc/cmdline"
# The following SONIC_BOOT_TYPEs come from the warm/fast reboot script which is in sonic-utilities
# Because the system can be rebooted from some old versions, we have to take all possible BOOT options into consideration.
# On 201803, 201807 we have
# BOOT_OPTIONS="$(echo $KERNEL_OPTIONS | sed -e 's/\s*linux\s*/BOOT_IMAGE=/') fast-reboot"
# On 201811 and later we have
# BOOT_OPTIONS="$(echo $KERNEL_OPTIONS | sed -e 's/\s*linux\s*/BOOT_IMAGE=/') SONIC_BOOT_TYPE=${BOOT_TYPE_ARG}" where BOOT_TYPE_ARG can be warm, fastfast or fast
# To extract the commom part of them, we should have the following PATTERN
REBOOT_TYPE_KEXEC_PATTERN_WARM = ".*SONIC_BOOT_TYPE=(warm|fastfast).*"
REBOOT_TYPE_KEXEC_PATTERN_FAST = ".*SONIC_BOOT_TYPE=(fast|fast-reboot).*"

UNKNOWN_REBOOT_CAUSE = "Unknown"

Expand All @@ -47,7 +58,32 @@ def log_error(msg):


# ============================= Functions =============================

def parse_warmfast_reboot_from_proc_cmdline():
if os.path.isfile(REBOOT_TYPE_KEXEC_FILE):
with open(REBOOT_TYPE_KEXEC_FILE, "r") as cause_file:
cause_file_kexec = cause_file.readline()
m = re.match(REBOOT_TYPE_KEXEC_PATTERN_WARM, cause_file_kexec)
if m and m.group(1):
return 'warm-reboot'
m = re.match(REBOOT_TYPE_KEXEC_PATTERN_FAST, cause_file_kexec)
if m and m.group(1):
return 'fast-reboot'
return None


def find_software_reboot_cause():
software_reboot_cause = UNKNOWN_REBOOT_CAUSE

if os.path.isfile(REBOOT_CAUSE_FILE):
with open(REBOOT_CAUSE_FILE, "r") as cause_file:
software_reboot_cause = cause_file.readline().rstrip('\n')

if os.path.isfile(FIRST_BOOT_PLATFORM_FILE):
os.remove(FIRST_BOOT_PLATFORM_FILE)

return software_reboot_cause


def main():
log_info("Starting up...")

Expand All @@ -73,51 +109,48 @@ def main():
try:
import sonic_platform

# Check if the previous reboot was caused by hardware
platform = sonic_platform.platform.Platform()

chassis = platform.get_chassis()

hardware_reboot_cause, optional_details = chassis.get_reboot_cause()

if hardware_reboot_cause == chassis.REBOOT_CAUSE_NON_HARDWARE:
# The reboot was not caused by hardware. If there is a REBOOT_CAUSE_FILE, it will
# contain any software-related reboot info. We will use it as the previous cause.
# 1. Check if the previous reboot was warm/fast reboot by testing whether there is "fast|fastfast|warm" in /proc/cmdline
# If yes, the content of /hosts/reboot-cause/reboot-cause.txt will be treated as the reboot cause
proc_cmdline_reboot_cause = parse_warmfast_reboot_from_proc_cmdline()
if proc_cmdline_reboot_cause:
log_info("/proc/cmdline indicates reboot type: {}".format(proc_cmdline_reboot_cause))
if os.path.isfile(REBOOT_CAUSE_FILE):
cause_file = open(REBOOT_CAUSE_FILE, "r")
previous_reboot_cause = cause_file.readline().rstrip('\n')
cause_file.close()
# If it is FirstTime Boot and previous_reboot_cause is unknown
# and hardware_reboot cause is non_hardware then
# Update the reboot cause as required
if os.path.isfile(FIRST_BOOT_PLATFORM_FILE):
if (previous_reboot_cause == UNKNOWN_REBOOT_CAUSE):
previous_reboot_cause = UNKNOWN_REBOOT_CAUSE
os.remove(FIRST_BOOT_PLATFORM_FILE)
elif hardware_reboot_cause == chassis.REBOOT_CAUSE_HARDWARE_OTHER:
previous_reboot_cause = "{} ({})".format(hardware_reboot_cause, optional_details)
with open(REBOOT_CAUSE_FILE, "r") as cause_file:
proc_cmdline_reboot_cause = cause_file.readline().rstrip('\n')
else:
# /proc/cmdline says it's a warm/fast reboot but /host/reboot-cause.txt doesn't exist.
# This could happen when upgrading from a version doesn't support reboot cause.
log_info("Reboot cause file {} doesn't exist".format(REBOOT_CAUSE_DIR))

if proc_cmdline_reboot_cause is not None:
previous_reboot_cause = proc_cmdline_reboot_cause
else:
previous_reboot_cause = hardware_reboot_cause
# 2. Check if the previous reboot was caused by hardware
# If yes, the hardware reboot cause will be treated as the reboot cause
platform = sonic_platform.platform.Platform()

chassis = platform.get_chassis()

hardware_reboot_cause, optional_details = chassis.get_reboot_cause()

if hardware_reboot_cause == chassis.REBOOT_CAUSE_NON_HARDWARE:
# The reboot was not caused by hardware. If there is a REBOOT_CAUSE_FILE, it will
# contain any software-related reboot info. We will use it as the previous cause.
previous_reboot_cause = find_software_reboot_cause()
elif hardware_reboot_cause == chassis.REBOOT_CAUSE_HARDWARE_OTHER:
previous_reboot_cause = "{} ({})".format(hardware_reboot_cause, optional_details)
else:
previous_reboot_cause = hardware_reboot_cause
except ImportError as err:
log_warning("sonic_platform package not installed. Unable to detect hardware reboot causes.")

# If there is a REBOOT_CAUSE_FILE, it will contain any software-related
# reboot info. We will use it as the previous cause.
if os.path.isfile(REBOOT_CAUSE_FILE):
cause_file = open(REBOOT_CAUSE_FILE, "r")
previous_reboot_cause = cause_file.readline().rstrip('\n')
cause_file.close()

# If it is FirstTime Boot and previous_reboot_cause is unknown
# Update the reboot cause as required
if os.path.isfile(FIRST_BOOT_PLATFORM_FILE):
if (previous_reboot_cause == UNKNOWN_REBOOT_CAUSE):
previous_reboot_cause = UNKNOWN_REBOOT_CAUSE
os.remove(FIRST_BOOT_PLATFORM_FILE)
previous_reboot_cause = find_software_reboot_cause()

# Write the previous reboot cause to PREVIOUS_REBOOT_CAUSE_FILE
prev_cause_file = open(PREVIOUS_REBOOT_CAUSE_FILE, "w")
prev_cause_file.write(previous_reboot_cause)
prev_cause_file.close()
with open(PREVIOUS_REBOOT_CAUSE_FILE, "w") as prev_cause_file:
prev_cause_file.write(previous_reboot_cause)
Copy link
Contributor

@yxieca yxieca Dec 12, 2019

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This is not an issue for this PR or change request for this PR. Just want to start the discussion here.

This change is the result of a team brainstorm of various combinations of hard/soft reboot reason. We fell that we've covered all the cases for now. But in case we didn't. We could consider change the previous reboot cause file to 3 lines:

Reboot cause: (calculation result)
Detected hardware cause: (hardware cause)
Detected software cause: (software cause)

Not sure if any other part of our code has dependency on this file being single line?

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

personally I agree your point.
in addition, if reboot causes from different sources are conflict, it's better to have it in a detailed way.
meanwhile, the drawback is it can confuse user sometimes. for example, if the following info displayed:

calculated reboot cause: warm reboot
hardware cause: power off
software cause: warm reboot

probably it's better to only display in a 3-line way in case of we known for sure there is conflicts among causes from different sources?

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

You got a good point. However, if we know there is a conflict, then we would have figured it out and fixed it in code. We cannot predict what we don't know yet. :-)


# Also log the previous reboot cause to the syslog
log_info("Previous reboot cause: {}".format(previous_reboot_cause))
Expand All @@ -127,9 +160,8 @@ def main():
os.remove(REBOOT_CAUSE_FILE)

# Write a new default reboot cause file for the next reboot
cause_file = open(REBOOT_CAUSE_FILE, "w")
cause_file.write(UNKNOWN_REBOOT_CAUSE)
cause_file.close()
with open(REBOOT_CAUSE_FILE, "w") as cause_file:
cause_file.write(UNKNOWN_REBOOT_CAUSE)


if __name__ == "__main__":
Expand Down