From 4ce8c38ed47e350935a0bcb0df959b1a14df8e7b Mon Sep 17 00:00:00 2001 From: Ying Xie Date: Tue, 24 Sep 2019 01:27:48 +0000 Subject: [PATCH 1/4] [warm/fast reboot] some service docker might have been stopped already Some services are depending on swss and might have been stopped at the time the loop of docker killing was executed. Signed-off-by: Ying Xie --- scripts/fast-reboot | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/scripts/fast-reboot b/scripts/fast-reboot index e11e8667ff..0310e4e2a5 100755 --- a/scripts/fast-reboot +++ b/scripts/fast-reboot @@ -477,7 +477,7 @@ debug "Stopped syncd ..." # then immediately call `systemctl stop ...` to prevent the service from # restarting the container automatically. for CONTAINER_NAME in $(docker ps --format '{{.Names}}'); do - docker kill $CONTAINER_NAME > /dev/null + docker kill $CONTAINER_NAME > /dev/null || [ $? == 1 ] systemctl stop $CONTAINER_NAME done From 6c9baa192b91fe8f84c2932af989f1327c7f4ea0 Mon Sep 17 00:00:00 2001 From: Ying Xie Date: Tue, 24 Sep 2019 19:00:50 +0000 Subject: [PATCH 2/4] log and ignore all errors while killing containers --- scripts/fast-reboot | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/scripts/fast-reboot b/scripts/fast-reboot index 0310e4e2a5..80706a83b8 100755 --- a/scripts/fast-reboot +++ b/scripts/fast-reboot @@ -476,10 +476,16 @@ debug "Stopped syncd ..." # We call `docker kill ...` to ensure the container stops as quickly as possible, # then immediately call `systemctl stop ...` to prevent the service from # restarting the container automatically. +debug "Stopping all remaining containers ..." for CONTAINER_NAME in $(docker ps --format '{{.Names}}'); do - docker kill $CONTAINER_NAME > /dev/null || [ $? == 1 ] + CONTAINER_STOP_RC=0 + docker kill $CONTAINER_NAME &> /dev/null || CONTAINER_STOP_RC=$? systemctl stop $CONTAINER_NAME + if [[ CONTAINER_STOP_RC -ne 0 ]]; then + debug "Failed killing container $CONTAINER_NAME RC $CONTAINER_STOP_RC ." + fi done +debug "Stopped all remaining containers ..." # Stop the docker container engine. Otherwise we will have a broken docker storage systemctl stop docker.service From 0f494888a025e7eed9e163c500d1399649295c7f Mon Sep 17 00:00:00 2001 From: Ying Xie Date: Tue, 24 Sep 2019 20:32:31 +0000 Subject: [PATCH 3/4] add debug message --- scripts/fast-reboot | 2 ++ 1 file changed, 2 insertions(+) diff --git a/scripts/fast-reboot b/scripts/fast-reboot index 80706a83b8..5b300c9fa4 100755 --- a/scripts/fast-reboot +++ b/scripts/fast-reboot @@ -423,12 +423,14 @@ if [[ "$REBOOT_TYPE" = "fast-reboot" ]]; then # then immediately call `systemctl stop teamd` to prevent the service from # restarting the container automatically. # Note: teamd must be killed before syncd, because it will send the last packet through CPU port + debug "Stopping teamd ..." docker exec -i teamd pkill -USR2 teamd || [ $? == 1 ] while docker exec -i teamd pgrep teamd > /dev/null; do sleep 0.05 done docker kill teamd > /dev/null systemctl stop teamd + debug "Stopped teamd ..." fi # Kill swss Docker container From c551c32e1d4e89553b1deac4aa66fa8c80bdf641 Mon Sep 17 00:00:00 2001 From: Ying Xie Date: Tue, 24 Sep 2019 20:40:09 +0000 Subject: [PATCH 4/4] put redirect at the right place --- scripts/fast-reboot | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/scripts/fast-reboot b/scripts/fast-reboot index 5b300c9fa4..e16e11f068 100755 --- a/scripts/fast-reboot +++ b/scripts/fast-reboot @@ -466,7 +466,7 @@ if [[ "$REBOOT_TYPE" = "warm-reboot" || "$REBOOT_TYPE" = "fastfast-reboot" ]]; t # Send USR1 signal to all teamd instances to stop them # It will prepare teamd for warm-reboot # Note: We must send USR1 signal before syncd, because it will send the last packet through CPU port - docker exec -i teamd pkill -USR1 teamd || [ $? == 1 ] > /dev/null + docker exec -i teamd pkill -USR1 teamd > /dev/null || [ $? == 1 ] debug "Stopped teamd ..." fi