From 34209269d66e17d373c065bfd3f0f90e2a655318 Mon Sep 17 00:00:00 2001 From: sujkang Date: Fri, 19 Feb 2021 13:57:50 -0800 Subject: [PATCH 1/4] add soft-reboot --- scripts/soft-reboot | 229 ++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 229 insertions(+) create mode 100755 scripts/soft-reboot diff --git a/scripts/soft-reboot b/scripts/soft-reboot new file mode 100755 index 0000000000..13526b7ede --- /dev/null +++ b/scripts/soft-reboot @@ -0,0 +1,229 @@ +#!/bin/bash +DEVPATH="/usr/share/sonic/device" +REBOOT_CAUSE_FILE="/host/reboot-cause/reboot-cause.txt" +REBOOT_TIME=$(date) +REBOOT_METHOD="/sbin/kexec -e" +LOG_SSD_HEALTH="/usr/local/bin/log_ssd_health" + + +EXIT_SUCCESS=0 +EXIT_FAILURE=1 +EXIT_NOT_SUPPORTED=2 +EXIT_FILE_SYSTEM_FULL=3 +EXIT_NEXT_IMAGE_NOT_EXISTS=4 + +# Reboot immediately if we run the kdump capture kernel +VMCORE_FILE=/proc/vmcore +if [ -e $VMCORE_FILE -a -s $VMCORE_FILE ]; then + echo "We have a /proc/vmcore, then we just kdump'ed" + echo "User issued 'kdump' command [User: kdump, Time: ${REBOOT_TIME}]" > ${REBOOT_CAUSE_FILE} + sync + PLATFORM=$(grep -oP 'sonic_platform=\K\S+' /proc/cmdline) + if [ ! -z "${PLATFORM}" -a -x ${DEVPATH}/${PLATFORM}/${PLAT_REBOOT} ]; then + exec ${DEVPATH}/${PLATFORM}/${PLAT_REBOOT} + fi + # If no platform-specific reboot tool, just run /sbin/reboot + /sbin/reboot + echo 1 > /proc/sys/kernel/sysrq + echo b > /proc/sysrq-trigger +fi + +REBOOT_USER=$(logname) +PLATFORM=$(sonic-cfggen -H -v DEVICE_METADATA.localhost.platform) +ASIC_TYPE=$(sonic-cfggen -y /etc/sonic/sonic_version.yml -v asic_type) +VERBOSE=no +EXIT_NEXT_IMAGE_NOT_EXISTS=4 +EXIT_SONIC_INSTALLER_VERIFY_REBOOT=21 +SSD_FW_UPDATE="ssd-fw-upgrade" +REBOOT_SCRIPT_NAME=$(basename $0) +REBOOT_TYPE="${REBOOT_SCRIPT_NAME}" +PLATFORM_PLUGIN="${REBOOT_TYPE}_plugin" +TAG_LATEST=yes + +function debug() +{ + if [[ x"${VERBOSE}" == x"yes" ]]; then + echo `date` $@ + fi + logger "$@" +} + +function tag_images() +{ + if test -f /usr/local/bin/ctrmgr_tools.py + then + if [[ x"${TAG_LATEST}" == x"yes" ]]; then + /usr/local/bin/ctrmgr_tools.py tag-all + fi + fi +} + +function stop_sonic_services() +{ + if [[ x"$ASIC_TYPE" != x"mellanox" ]]; then + debug "Stopping syncd process..." + docker exec -i syncd /usr/bin/syncd_request_shutdown --cold > /dev/null + sleep 3 + fi +} + +function clear_warm_boot() +{ + # If reboot is requested, make sure the outstanding warm-boot is cleared + # So the system will come up from a cold boot. + WARM_DIR="/host/warmboot" + REDIS_FILE=dump.rdb + TIMESTAMP=`date +%Y%m%d-%H%M%S` + if [[ -f ${WARM_DIR}/${REDIS_FILE} ]]; then + mv -f ${WARM_DIR}/${REDIS_FILE} ${WARM_DIR}/${REDIS_FILE}.${TIMESTAMP} || /bin/true + fi + /sbin/kexec -u || /bin/true +} + +SCRIPT=$0 + +function show_help_and_exit() +{ + echo "Usage ${SCRIPT} [options]" + echo " Request rebooting the device. Invoke platform-specific tool when available." + echo " This script will shutdown syncd before rebooting." + echo " " + echo " Available options:" + echo " -h, -? : getting this help" + + exit "${EXIT_SUCCESS}" +} + +function setup_reboot_variables() +{ + # Kernel and initrd image + NEXT_SONIC_IMAGE=$(sonic-installer list | grep "Next: " | cut -d ' ' -f 2) + IMAGE_PATH="/host/image-${NEXT_SONIC_IMAGE#SONiC-OS-}" + if grep -q aboot_platform= /host/machine.conf; then + if is_secureboot; then + KERNEL_IMAGE="" + BOOT_OPTIONS="SONIC_BOOT_TYPE=${BOOT_TYPE_ARG} secure_boot_enable=1" + else + KERNEL_IMAGE="$(ls $IMAGE_PATH/boot/vmlinuz-*)" + BOOT_OPTIONS="$(cat "$IMAGE_PATH/kernel-cmdline" | tr '\n' ' ') SONIC_BOOT_TYPE=${BOOT_TYPE_ARG}" + fi + elif grep -q onie_platform= /host/machine.conf; then + KERNEL_OPTIONS=$(cat /host/grub/grub.cfg | sed "/$NEXT_SONIC_IMAGE'/,/}/"'!'"g" | grep linux) + KERNEL_IMAGE="/host$(echo $KERNEL_OPTIONS | cut -d ' ' -f 2)" + BOOT_OPTIONS="$(echo $KERNEL_OPTIONS | sed -e 's/\s*linux\s*/BOOT_IMAGE=/') SONIC_BOOT_TYPE=${BOOT_TYPE_ARG}" + else + error "Unknown bootloader. ${REBOOT_TYPE} is not supported." + exit "${EXIT_NOT_SUPPORTED}" + fi + INITRD=$(echo $KERNEL_IMAGE | sed 's/vmlinuz/initrd.img/g') +} + +function load_aboot_secureboot_kernel() { + local next_image="$IMAGE_PATH/sonic.swi" + echo "Loading next image from $next_image" + unzip -qp "$next_image" boot0 | \ + swipath=$next_image kexec=true loadonly=true ENV_EXTRA_CMDLINE="$BOOT_OPTIONS" bash - +} + +function load_kernel() { + # Load kernel into the memory + /sbin/kexec -l "$KERNEL_IMAGE" --initrd="$INITRD" --append="$BOOT_OPTIONS" +} + + +function reboot_pre_check() +{ + # Make sure that the file system is normal: read-write able + filename="/host/test-`date +%Y%m%d-%H%M%S`" + ERR=0 + touch ${filename} || ERR=$? + if [[ ${ERR} -ne 0 ]]; then + # Continue rebooting in this case, but log the error + VERBOSE=yes debug "Filesystem might be read-only or full ..." + fi + rm ${filename} + + # Verify the next image by sonic-installer + local message=$(sonic-installer verify-next-image 2>&1) + if [ $? -ne 0 ]; then + VERBOSE=yes debug "Failed to verify next image: ${message}" + exit ${EXIT_SONIC_INSTALLER_VERIFY_REBOOT} + fi +} + +function parse_options() +{ + while getopts "h?v" opt; do + case ${opt} in + h|\? ) + show_help_and_exit + ;; + v ) + VERBOSE=yes + ;; + t ) + TAG_LATEST=no + ;; + esac + done +} + +parse_options $@ + +# Exit if not superuser +if [[ "$EUID" -ne 0 ]]; then + echo "This command must be run as root" >&2 + exit "${EXIT_FAILURE}" +fi + +debug "User requested rebooting device ..." + +setup_reboot_variables +reboot_pre_check + +# Tag remotely deployed images as local +tag_images + +# Stop SONiC services gracefully. +stop_sonic_services + +clear_warm_boot + +if is_secureboot && grep -q aboot_machine= /host/machine.conf; then + load_aboot_secureboot_kernel +else + load_kernel +fi + +if [ -x ${LOG_SSD_HEALTH} ]; then + debug "Collecting logs to check ssd health before ${REBOOT_TYPE}..." + ${LOG_SSD_HEALTH} +fi + +# Update the reboot cause file to reflect that user issued 'reboot' command +# Upon next boot, the contents of this file will be used to determine the +# cause of the previous reboot +echo "User issued '${REBOOT_SCRIPT_NAME}' command [User: ${REBOOT_USER}, Time: ${REBOOT_TIME}]" > ${REBOOT_CAUSE_FILE} +sync +/sbin/fstrim -av +sleep 3 + +# sync the current system time to CMOS +if [ -x /sbin/hwclock ]; then + /sbin/hwclock -w || /bin/true +fi + +# Run platform specific reboot plugin +if [ -x ${DEVPATH}/${PLATFORM}/${PLATFORM_PLUGIN} ]; then + debug "Running ${PLATFORM} specific plugin..." + ${DEVPATH}/${PLATFORM}/${PLATFORM_PLUGIN} +fi + +# Reboot: explicitly call Linux native reboot under sbin +debug "Rebooting with ${REBOOT_METHOD} to ${NEXT_SONIC_IMAGE} ..." +exec ${REBOOT_METHOD} + +# Should never reach here +error "${REBOOT_TYPE} failed!" +exit "${EXIT_FAILURE}" + From 8007a333d122892601026a159de88e637dbdac91 Mon Sep 17 00:00:00 2001 From: sujkang Date: Fri, 19 Feb 2021 18:39:57 -0800 Subject: [PATCH 2/4] soft-reboot --- scripts/soft-reboot | 40 ++++++++++++++-------------------------- 1 file changed, 14 insertions(+), 26 deletions(-) diff --git a/scripts/soft-reboot b/scripts/soft-reboot index 13526b7ede..db33374a8a 100755 --- a/scripts/soft-reboot +++ b/scripts/soft-reboot @@ -38,6 +38,7 @@ SSD_FW_UPDATE="ssd-fw-upgrade" REBOOT_SCRIPT_NAME=$(basename $0) REBOOT_TYPE="${REBOOT_SCRIPT_NAME}" PLATFORM_PLUGIN="${REBOOT_TYPE}_plugin" +BOOT_TYPE_ARG="soft" TAG_LATEST=yes function debug() @@ -100,13 +101,8 @@ function setup_reboot_variables() NEXT_SONIC_IMAGE=$(sonic-installer list | grep "Next: " | cut -d ' ' -f 2) IMAGE_PATH="/host/image-${NEXT_SONIC_IMAGE#SONiC-OS-}" if grep -q aboot_platform= /host/machine.conf; then - if is_secureboot; then - KERNEL_IMAGE="" - BOOT_OPTIONS="SONIC_BOOT_TYPE=${BOOT_TYPE_ARG} secure_boot_enable=1" - else - KERNEL_IMAGE="$(ls $IMAGE_PATH/boot/vmlinuz-*)" - BOOT_OPTIONS="$(cat "$IMAGE_PATH/kernel-cmdline" | tr '\n' ' ') SONIC_BOOT_TYPE=${BOOT_TYPE_ARG}" - fi + KERNEL_IMAGE="$(ls $IMAGE_PATH/boot/vmlinuz-*)" + BOOT_OPTIONS="$(cat "$IMAGE_PATH/kernel-cmdline" | tr '\n' ' ') SONIC_BOOT_TYPE=${BOOT_TYPE_ARG}" elif grep -q onie_platform= /host/machine.conf; then KERNEL_OPTIONS=$(cat /host/grub/grub.cfg | sed "/$NEXT_SONIC_IMAGE'/,/}/"'!'"g" | grep linux) KERNEL_IMAGE="/host$(echo $KERNEL_OPTIONS | cut -d ' ' -f 2)" @@ -118,19 +114,11 @@ function setup_reboot_variables() INITRD=$(echo $KERNEL_IMAGE | sed 's/vmlinuz/initrd.img/g') } -function load_aboot_secureboot_kernel() { - local next_image="$IMAGE_PATH/sonic.swi" - echo "Loading next image from $next_image" - unzip -qp "$next_image" boot0 | \ - swipath=$next_image kexec=true loadonly=true ENV_EXTRA_CMDLINE="$BOOT_OPTIONS" bash - -} - function load_kernel() { # Load kernel into the memory /sbin/kexec -l "$KERNEL_IMAGE" --initrd="$INITRD" --append="$BOOT_OPTIONS" } - function reboot_pre_check() { # Make sure that the file system is normal: read-write able @@ -176,6 +164,11 @@ if [[ "$EUID" -ne 0 ]]; then exit "${EXIT_FAILURE}" fi +if [ -x ${LOG_SSD_HEALTH} ]; then + debug "Collecting logs to check ssd health before ${REBOOT_TYPE}..." + ${LOG_SSD_HEALTH} +fi + debug "User requested rebooting device ..." setup_reboot_variables @@ -189,24 +182,18 @@ stop_sonic_services clear_warm_boot -if is_secureboot && grep -q aboot_machine= /host/machine.conf; then - load_aboot_secureboot_kernel -else - load_kernel -fi +echo "$KERNEL_IMAGE : $INITRD : $BOOT_OPTIONS" -if [ -x ${LOG_SSD_HEALTH} ]; then - debug "Collecting logs to check ssd health before ${REBOOT_TYPE}..." - ${LOG_SSD_HEALTH} -fi +load_kernel # Update the reboot cause file to reflect that user issued 'reboot' command # Upon next boot, the contents of this file will be used to determine the # cause of the previous reboot echo "User issued '${REBOOT_SCRIPT_NAME}' command [User: ${REBOOT_USER}, Time: ${REBOOT_TIME}]" > ${REBOOT_CAUSE_FILE} + sync -/sbin/fstrim -av sleep 3 +sync # sync the current system time to CMOS if [ -x /sbin/hwclock ]; then @@ -219,8 +206,9 @@ if [ -x ${DEVPATH}/${PLATFORM}/${PLATFORM_PLUGIN} ]; then ${DEVPATH}/${PLATFORM}/${PLATFORM_PLUGIN} fi -# Reboot: explicitly call Linux native reboot under sbin +# Reboot: explicitly call Linux "kexec -u" debug "Rebooting with ${REBOOT_METHOD} to ${NEXT_SONIC_IMAGE} ..." +echo `cat ${REBOOT_CAUSE_FILE}` exec ${REBOOT_METHOD} # Should never reach here From 3947eb15ae938c03391f68409a3877481964f970 Mon Sep 17 00:00:00 2001 From: sujkang Date: Fri, 19 Feb 2021 18:45:31 -0800 Subject: [PATCH 3/4] message correction --- scripts/soft-reboot | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/scripts/soft-reboot b/scripts/soft-reboot index db33374a8a..8383c60a86 100755 --- a/scripts/soft-reboot +++ b/scripts/soft-reboot @@ -206,7 +206,7 @@ if [ -x ${DEVPATH}/${PLATFORM}/${PLATFORM_PLUGIN} ]; then ${DEVPATH}/${PLATFORM}/${PLATFORM_PLUGIN} fi -# Reboot: explicitly call Linux "kexec -u" +# Reboot: explicitly call Linux "kexec -e" debug "Rebooting with ${REBOOT_METHOD} to ${NEXT_SONIC_IMAGE} ..." echo `cat ${REBOOT_CAUSE_FILE}` exec ${REBOOT_METHOD} From 270070130872052f239f32148e2584c292dbbd3e Mon Sep 17 00:00:00 2001 From: sujkang Date: Wed, 24 Feb 2021 16:47:03 -0800 Subject: [PATCH 4/4] review comments --- scripts/soft-reboot | 26 ++++++++++++++++++-------- 1 file changed, 18 insertions(+), 8 deletions(-) diff --git a/scripts/soft-reboot b/scripts/soft-reboot index 8383c60a86..52ccdd690b 100755 --- a/scripts/soft-reboot +++ b/scripts/soft-reboot @@ -4,7 +4,7 @@ REBOOT_CAUSE_FILE="/host/reboot-cause/reboot-cause.txt" REBOOT_TIME=$(date) REBOOT_METHOD="/sbin/kexec -e" LOG_SSD_HEALTH="/usr/local/bin/log_ssd_health" - +WATCHDOG_UTIL="/usr/local/bin/watchdogutil" EXIT_SUCCESS=0 EXIT_FAILURE=1 @@ -68,10 +68,12 @@ function stop_sonic_services() fi } -function clear_warm_boot() +function clear_lingering_reboot_config() { - # If reboot is requested, make sure the outstanding warm-boot is cleared - # So the system will come up from a cold boot. + # Clear any outstanding warm-reboot config + result=`timeout 10s config warm_restart disable; if [[ $? == 124 ]]; then echo timeout; else echo "code ($?)"; fi` || /bin/true + debug "Cancel warm-reboot: ${result}" + WARM_DIR="/host/warmboot" REDIS_FILE=dump.rdb TIMESTAMP=`date +%Y%m%d-%H%M%S` @@ -180,9 +182,7 @@ tag_images # Stop SONiC services gracefully. stop_sonic_services -clear_warm_boot - -echo "$KERNEL_IMAGE : $INITRD : $BOOT_OPTIONS" +clear_lingering_reboot_config load_kernel @@ -200,6 +200,17 @@ if [ -x /sbin/hwclock ]; then /sbin/hwclock -w || /bin/true fi +if [ -x ${DEVPATH}/${PLATFORM}/${SSD_FW_UPDATE} ]; then + debug "updating ssd fw for${REBOOT_TYPE}" + ${DEVPATH}/${PLATFORM}/${SSD_FW_UPDATE} ${REBOOT_TYPE} +fi + +# Enable Watchdog Timer +if [ -x ${WATCHDOG_UTIL} ]; then + debug "Enabling Watchdog before ${REBOOT_TYPE}" + ${WATCHDOG_UTIL} arm +fi + # Run platform specific reboot plugin if [ -x ${DEVPATH}/${PLATFORM}/${PLATFORM_PLUGIN} ]; then debug "Running ${PLATFORM} specific plugin..." @@ -208,7 +219,6 @@ fi # Reboot: explicitly call Linux "kexec -e" debug "Rebooting with ${REBOOT_METHOD} to ${NEXT_SONIC_IMAGE} ..." -echo `cat ${REBOOT_CAUSE_FILE}` exec ${REBOOT_METHOD} # Should never reach here