Skip to content
Merged
Changes from 4 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
95 changes: 64 additions & 31 deletions files/image_config/process-reboot-cause/process-reboot-cause
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@ try:
import pwd
import sys
import syslog
import re
except ImportError as err:
raise ImportError("%s - required module not found" % str(err))

Expand All @@ -22,6 +23,15 @@ REBOOT_CAUSE_DIR = "/host/reboot-cause/"
REBOOT_CAUSE_FILE = REBOOT_CAUSE_DIR + "reboot-cause.txt"
PREVIOUS_REBOOT_CAUSE_FILE = REBOOT_CAUSE_DIR + "previous-reboot-cause.txt"
FIRST_BOOT_PLATFORM_FILE = "/tmp/notify_firstboot_to_platform"
REBOOT_TYPE_KEXEC_FILE = "/proc/cmdline"
# The following SONIC_BOOT_TYPEs come from the warm/fast reboot script which is in sonic-utilities
# Because the system can be rebooted from some old versions, we have to take all possible BOOT options into consideration.
# On 201803, 201807 we have
# BOOT_OPTIONS="$(echo $KERNEL_OPTIONS | sed -e 's/\s*linux\s*/BOOT_IMAGE=/') fast-reboot"
# On 201811 and later we have
# BOOT_OPTIONS="$(echo $KERNEL_OPTIONS | sed -e 's/\s*linux\s*/BOOT_IMAGE=/') SONIC_BOOT_TYPE=${BOOT_TYPE_ARG}" where BOOT_TYPE_ARG can be warm, fastfast or fast
# To extract the commom part of them, we should have the following PATTERN
REBOOT_TYPE_KEXEC_PATTERN = ".*(fast-reboot|warm|fastfast|fast).*"

UNKNOWN_REBOOT_CAUSE = "Unknown"

Expand All @@ -47,6 +57,16 @@ def log_error(msg):


# ============================= Functions =============================
def is_warmfast_reboot_from_proc_cmdline():
if os.path.isfile(REBOOT_TYPE_KEXEC_FILE):
with open(REBOOT_TYPE_KEXEC_FILE, "r") as cause_file:
cause_file_kexec = cause_file.readline()
m = re.match(REBOOT_TYPE_KEXEC_PATTERN, cause_file_kexec)
if m and m.group(1):
# the pattern matched so it's a fast/warm reboot
return True
return False


def main():
log_info("Starting up...")
Expand All @@ -73,40 +93,55 @@ def main():
try:
import sonic_platform

# Check if the previous reboot was caused by hardware
platform = sonic_platform.platform.Platform()

chassis = platform.get_chassis()

hardware_reboot_cause, optional_details = chassis.get_reboot_cause()
proc_cmdline_reboot_cause = None

if hardware_reboot_cause == chassis.REBOOT_CAUSE_NON_HARDWARE:
# The reboot was not caused by hardware. If there is a REBOOT_CAUSE_FILE, it will
# contain any software-related reboot info. We will use it as the previous cause.
# 1. Check if the previous reboot was warm/fast reboot by testing whether there is "fast|fastfast|warm" in /proc/cmdline
# If yes, the content of /hosts/reboot-cause/reboot-cause.txt will be treated as the reboot cause
if is_warmfast_reboot_from_proc_cmdline():
if os.path.isfile(REBOOT_CAUSE_FILE):
cause_file = open(REBOOT_CAUSE_FILE, "r")
previous_reboot_cause = cause_file.readline().rstrip('\n')
cause_file.close()
# If it is FirstTime Boot and previous_reboot_cause is unknown
# and hardware_reboot cause is non_hardware then
# Update the reboot cause as required
if os.path.isfile(FIRST_BOOT_PLATFORM_FILE):
if (previous_reboot_cause == UNKNOWN_REBOOT_CAUSE):
previous_reboot_cause = UNKNOWN_REBOOT_CAUSE
os.remove(FIRST_BOOT_PLATFORM_FILE)
elif hardware_reboot_cause == chassis.REBOOT_CAUSE_HARDWARE_OTHER:
previous_reboot_cause = "{} ({})".format(hardware_reboot_cause, optional_details)
with open(REBOOT_CAUSE_FILE, "r") as cause_file:
proc_cmdline_reboot_cause = cause_file.readline().rstrip('\n')
else:
# /proc/cmdline says it's a warm/fast reboot but /host/reboot-cause.txt doesn't exist.
# report an error.
log_error("/proc/cmdline indicates a fast/warm reboot but {} doesn't exist".format(REBOOT_CAUSE_DIR))
Copy link
Copy Markdown
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

other than report error,

I think we should still assign the

proc_cmdline_reboot_cause = warm or fast

we should treat proc cmdline as truth.

Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Done


if proc_cmdline_reboot_cause is not None:
previous_reboot_cause = proc_cmdline_reboot_cause
else:
previous_reboot_cause = hardware_reboot_cause
# 2. Check if the previous reboot was caused by hardware
# If yes, the hardware reboot cause will be treated as teh reboot cause
Copy link
Copy Markdown
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

teh [](start = 70, length = 3)

-> the

Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

fixed

platform = sonic_platform.platform.Platform()

chassis = platform.get_chassis()

hardware_reboot_cause, optional_details = chassis.get_reboot_cause()

if hardware_reboot_cause == chassis.REBOOT_CAUSE_NON_HARDWARE:
# The reboot was not caused by hardware. If there is a REBOOT_CAUSE_FILE, it will
# contain any software-related reboot info. We will use it as the previous cause.
if os.path.isfile(REBOOT_CAUSE_FILE):
with open(REBOOT_CAUSE_FILE, "r") as cause_file:
previous_reboot_cause = cause_file.readline().rstrip('\n')
# If it is FirstTime Boot and previous_reboot_cause is unknown
# and hardware_reboot cause is non_hardware then
# Update the reboot cause as required
if os.path.isfile(FIRST_BOOT_PLATFORM_FILE):
if (previous_reboot_cause == UNKNOWN_REBOOT_CAUSE):
previous_reboot_cause = UNKNOWN_REBOOT_CAUSE
os.remove(FIRST_BOOT_PLATFORM_FILE)
Copy link
Copy Markdown
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

line 121 to line 132 are same as line 140 to line 151.

suggest to consolidate them as a single function.

previous_reboot_cause = find_software_reboot_cause()

Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Done

elif hardware_reboot_cause == chassis.REBOOT_CAUSE_HARDWARE_OTHER:
previous_reboot_cause = "{} ({})".format(hardware_reboot_cause, optional_details)
else:
previous_reboot_cause = hardware_reboot_cause
except ImportError as err:
log_warning("sonic_platform package not installed. Unable to detect hardware reboot causes.")

# If there is a REBOOT_CAUSE_FILE, it will contain any software-related
# reboot info. We will use it as the previous cause.
if os.path.isfile(REBOOT_CAUSE_FILE):
cause_file = open(REBOOT_CAUSE_FILE, "r")
previous_reboot_cause = cause_file.readline().rstrip('\n')
cause_file.close()
with open(REBOOT_CAUSE_FILE, "r") as cause_file:
previous_reboot_cause = cause_file.readline().rstrip('\n')

# If it is FirstTime Boot and previous_reboot_cause is unknown
# Update the reboot cause as required
Expand All @@ -115,9 +150,8 @@ def main():
previous_reboot_cause = UNKNOWN_REBOOT_CAUSE
os.remove(FIRST_BOOT_PLATFORM_FILE)
# Write the previous reboot cause to PREVIOUS_REBOOT_CAUSE_FILE
prev_cause_file = open(PREVIOUS_REBOOT_CAUSE_FILE, "w")
prev_cause_file.write(previous_reboot_cause)
prev_cause_file.close()
with open(PREVIOUS_REBOOT_CAUSE_FILE, "w") as prev_cause_file:
prev_cause_file.write(previous_reboot_cause)
Copy link
Copy Markdown
Contributor

@yxieca yxieca Dec 12, 2019

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This is not an issue for this PR or change request for this PR. Just want to start the discussion here.

This change is the result of a team brainstorm of various combinations of hard/soft reboot reason. We fell that we've covered all the cases for now. But in case we didn't. We could consider change the previous reboot cause file to 3 lines:

Reboot cause: (calculation result)
Detected hardware cause: (hardware cause)
Detected software cause: (software cause)

Not sure if any other part of our code has dependency on this file being single line?

Copy link
Copy Markdown
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

personally I agree your point.
in addition, if reboot causes from different sources are conflict, it's better to have it in a detailed way.
meanwhile, the drawback is it can confuse user sometimes. for example, if the following info displayed:

calculated reboot cause: warm reboot
hardware cause: power off
software cause: warm reboot

probably it's better to only display in a 3-line way in case of we known for sure there is conflicts among causes from different sources?

Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

You got a good point. However, if we know there is a conflict, then we would have figured it out and fixed it in code. We cannot predict what we don't know yet. :-)


# Also log the previous reboot cause to the syslog
log_info("Previous reboot cause: {}".format(previous_reboot_cause))
Expand All @@ -127,9 +161,8 @@ def main():
os.remove(REBOOT_CAUSE_FILE)

# Write a new default reboot cause file for the next reboot
cause_file = open(REBOOT_CAUSE_FILE, "w")
cause_file.write(UNKNOWN_REBOOT_CAUSE)
cause_file.close()
with open(REBOOT_CAUSE_FILE, "w") as cause_file:
cause_file.write(UNKNOWN_REBOOT_CAUSE)


if __name__ == "__main__":
Expand Down