diff --git a/files/scripts/syncd.sh b/files/scripts/syncd.sh index 08c50a2c37c..f694b694c75 100755 --- a/files/scripts/syncd.sh +++ b/files/scripts/syncd.sh @@ -99,6 +99,10 @@ stop() { debug "Warm shutdown syncd process ..." /usr/bin/docker exec -i syncd /usr/bin/syncd_request_shutdown --warm + if [ x$sonic_asic_platform == x'mellanox' ]; then + /usr/bin/docker exec -i syncd /usr/bin/checkpoint.sh + fi + # wait until syncd quits gracefully while docker top syncd | grep -q /usr/bin/syncd; do sleep 0.1 diff --git a/platform/mellanox/docker-syncd-mlnx.mk b/platform/mellanox/docker-syncd-mlnx.mk index 5ad6f6dccf1..b9f69adebf5 100644 --- a/platform/mellanox/docker-syncd-mlnx.mk +++ b/platform/mellanox/docker-syncd-mlnx.mk @@ -13,4 +13,5 @@ $(DOCKER_SYNCD_MLNX)_CONTAINER_NAME = syncd $(DOCKER_SYNCD_MLNX)_RUN_OPT += --net=host --privileged -t $(DOCKER_SYNCD_MLNX)_RUN_OPT += -v /host/machine.conf:/etc/machine.conf $(DOCKER_SYNCD_MLNX)_RUN_OPT += -v /etc/sonic:/etc/sonic:ro +$(DOCKER_SYNCD_MLNX)_RUN_OPT += -v /host/warmboot:/var/warmboot $(DOCKER_SYNCD_MLNX)_RUN_OPT += --tmpfs /run/criu diff --git a/platform/mellanox/docker-syncd-mlnx/Dockerfile.j2 b/platform/mellanox/docker-syncd-mlnx/Dockerfile.j2 index bf301f179a4..51577a592c4 100755 --- a/platform/mellanox/docker-syncd-mlnx/Dockerfile.j2 +++ b/platform/mellanox/docker-syncd-mlnx/Dockerfile.j2 @@ -24,6 +24,9 @@ debs/{{ deb }}{{' '}} COPY ["start.sh", "/usr/bin/"] COPY ["supervisord.conf", "/etc/supervisor/conf.d/"] +COPY ["restore.sh", "/usr/bin/"] +COPY ["checkpoint.sh", "/usr/bin/"] + ## Clean up RUN apt-get clean -y; apt-get autoclean -y; apt-get autoremove -y RUN rm -rf /debs diff --git a/platform/mellanox/docker-syncd-mlnx/checkpoint.sh b/platform/mellanox/docker-syncd-mlnx/checkpoint.sh new file mode 100755 index 00000000000..0299c544bb4 --- /dev/null +++ b/platform/mellanox/docker-syncd-mlnx/checkpoint.sh @@ -0,0 +1,34 @@ +#!/usr/bin/env bash + +SYNCD_DUMP_DIR=/var/warmboot/syncd +SYNCD_PID=`pidof syncd` +SYNCD_EXTERNAL_ARGS=`ls -la /proc/$SYNCD_PID/fd | grep -ho "socket:\[[0-9]*\]" | sed "s/socket:/--external unix/g" | xargs` + +SDK_DUMP_DIR=/var/warmboot/sdk +SDK_PID=`pidof sx_sdk` +SDK_EXTERNAL_ARGS=`ls -la /proc/$SDK_PID/fd | grep -ho "socket:\[[0-9]*\]" | sed "s/socket:/--external unix/g" | xargs` + +SHM_DUMP_DIR=/var/warmboot/shm +SHM_DIR=/dev/shm + +if [ -d $SYNCD_DUMP_DIR ]; then + rm -rf $SYNCD_DUMP_DIR +fi + +if [ -d $SDK_DUMP_DIR ]; then + rm -rf $SDK_DUMP_DIR +fi + +if [ -d $SHM_DUMP_DIR ]; then + rm -rf $SHM_DUMP_DIR +fi + +mkdir $SYNCD_DUMP_DIR +mkdir $SDK_DUMP_DIR +mkdir $SHM_DUMP_DIR + +criu dump -D $SYNCD_DUMP_DIR -t $SYNCD_PID --shell-job $SYNCD_EXTERNAL_ARGS + +criu dump -D $SDK_DUMP_DIR -t $SDK_PID --shell-job $SDK_EXTERNAL_ARGS + +cp -rf $SHM_DIR/* $SHM_DUMP_DIR/ diff --git a/platform/mellanox/docker-syncd-mlnx/restore.sh b/platform/mellanox/docker-syncd-mlnx/restore.sh new file mode 100755 index 00000000000..d0be17841a7 --- /dev/null +++ b/platform/mellanox/docker-syncd-mlnx/restore.sh @@ -0,0 +1,21 @@ +#!/usr/bin/env bash + +SYNCD_DUMP_DIR=/var/warmboot/syncd +SDK_DUMP_DIR=/var/warmboot/sdk +SHM_DUMP_DIR=/var/warmboot/shm +SHM_DIR=/dev/shm + +if [ -d $SHM_DUMP_DIR ]; then + cp -rf $SHM_DUMP_DIR/* $SHM_DIR/ + rm -rf $SHM_DUMP_DIR +fi + +if [ -d $SDK_DUMP_DIR ]; then + criu restore -d -D $SDK_DUMP_DIR --shell-job --ext-unix-sk + rm -rf $SDK_DUMP_DIR +fi + +if [ -d $SYNCD_DUMP_DIR ]; then + criu restore -d -D $SYNCD_DUMP_DIR --shell-job --ext-unix-sk + rm -rf $SYNCD_DUMP_DIR +fi diff --git a/platform/mellanox/docker-syncd-mlnx/start.sh b/platform/mellanox/docker-syncd-mlnx/start.sh index 3adbd04d536..e0156e57870 100755 --- a/platform/mellanox/docker-syncd-mlnx/start.sh +++ b/platform/mellanox/docker-syncd-mlnx/start.sh @@ -1,5 +1,9 @@ #!/usr/bin/env bash +if [ -d /var/warmboot/syncd ]; then + echo 1000 > /proc/sys/kernel/ns_last_pid +fi + rm -f /var/run/rsyslogd.pid supervisorctl start rsyslogd diff --git a/platform/mellanox/rules.mk b/platform/mellanox/rules.mk index ff5c96fe004..cc465d41a93 100644 --- a/platform/mellanox/rules.mk +++ b/platform/mellanox/rules.mk @@ -11,6 +11,7 @@ include $(PLATFORM_PATH)/libsaithrift-dev.mk include $(PLATFORM_PATH)/python-saithrift.mk include $(PLATFORM_PATH)/docker-ptf-mlnx.mk include $(PLATFORM_PATH)/mlnx-sfpd.mk +include $(PLATFORM_PATH)/criu.mk SONIC_ALL += $(SONIC_ONE_IMAGE) \ $(DOCKER_FPM)