Skip to content
Merged
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
46 changes: 46 additions & 0 deletions scripts/fast-reboot
Original file line number Diff line number Diff line change
Expand Up @@ -156,6 +156,42 @@ function request_pre_shutdown()
}
}

function recover_issu_bank_file_instruction()
{
debug "To recover (${ISSU_BANK_FILE}) file, do the following:"
debug "$ docker exec -it syncd sx_api_dbg_generate_dump.py"
debug "$ docker exec -it syncd cat /tmp/sdkdump | grep 'ISSU Bank'"
debug "Command above will print the VALUE of ISSU BANK - 0 or 1, use this VALUE in the next command"
debug "$ printf VALUE > /host/warmboot/issu_bank.txt"
}

function check_issu_bank_file()
{
ISSU_BANK_FILE=/host/warmboot/issu_bank.txt
MLNX_ISSU_BANK_BROKEN=102

if [[ ! -s "$ISSU_BANK_FILE" ]]; then
error "(${ISSU_BANK_FILE}) does NOT exist or empty ..."
recover_issu_bank_file_instruction
if [[ "$1" = true ]]; then
exit "${MLNX_ISSU_BANK_BROKEN}"
fi
return
fi

issu_file_chars_count=`stat -c %s ${ISSU_BANK_FILE}`;
issu_file_content=`awk '{print $0}' ${ISSU_BANK_FILE}`

if [[ $issu_file_chars_count != 1 ]] ||
[[ "$issu_file_content" != "0" && "$issu_file_content" != "1" ]]; then
error "(${ISSU_BANK_FILE}) is broken ..."
recover_issu_bank_file_instruction
if [[ "$1" = true ]]; then
exit "${MLNX_ISSU_BANK_BROKEN}"
fi
fi
}

function wait_for_pre_shutdown_complete_or_fail()
{
debug "Waiting for pre-shutdown ..."
Expand Down Expand Up @@ -483,10 +519,20 @@ systemctl stop swss
if [[ "$REBOOT_TYPE" = "warm-reboot" || "$REBOOT_TYPE" = "fastfast-reboot" ]]; then
initialize_pre_shutdown

BEFORE_PRE_SHUTDOWN=true

if [[ "x$sonic_asic_type" == x"mellanox" ]]; then
check_issu_bank_file "$BEFORE_PRE_SHUTDOWN"
fi

request_pre_shutdown

wait_for_pre_shutdown_complete_or_fail

if [[ "x$sonic_asic_type" == x"mellanox" ]]; then
check_issu_bank_file
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Sorry for the last minute issue.

How can we distinguish if the error message came out from line 531 or 523? Are you able to passing in a differentiating token to check issu_bank_file and use that to distinguish if the error happens before or after pre-shutdown?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Sorry for the last minute issue.

How can we distinguish if the error message came out from line 531 or 523? Are you able to passing in a differentiating token to check issu_bank_file and use that to distinguish if the error happens before or after pre-shutdown?

Fixed

Copy link
Contributor

@yxieca yxieca Jun 3, 2020

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Sorry I didn't see this earlier, even before pre-shutdown, the swss has been stopped. Exiting here will leave the device eventually in failure mode requires human interaction.

I think both check should generate log event stating problem found and mark if the problem was found before or after pre-shutdown, but not exiting.

Is there a chance that there was a problem before pre-shutdown, but pre-shutdown fixed it?

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

fi

# Warm reboot: dump state to host disk
if [[ "$REBOOT_TYPE" = "fastfast-reboot" ]]; then
sonic-db-cli ASIC_DB FLUSHDB > /dev/null
Expand Down