diff --git a/dockers/docker-orchagent/Dockerfile.j2 b/dockers/docker-orchagent/Dockerfile.j2 index 17c0983c58b..c4938904bec 100755 --- a/dockers/docker-orchagent/Dockerfile.j2 +++ b/dockers/docker-orchagent/Dockerfile.j2 @@ -67,7 +67,7 @@ COPY ["files/arp_update", "/usr/bin"] COPY ["arp_update.conf", "files/arp_update_vars.j2", "/usr/share/sonic/templates/"] COPY ["ndppd.conf", "/usr/share/sonic/templates/"] COPY ["enable_counters.py", "/usr/bin"] -COPY ["docker-init.sh", "orchagent.sh", "swssconfig.sh", "buffermgrd.sh", "/usr/bin/"] +COPY ["docker-init.sh", "orchagent.sh", "swssconfig.sh", "buffermgrd.sh", "kernel_arp_restore.sh", "/usr/bin/"] COPY ["supervisord.conf", "/etc/supervisor/conf.d/"] COPY ["files/supervisor-proc-exit-listener", "/usr/bin"] COPY ["critical_processes", "/etc/supervisor/"] diff --git a/dockers/docker-orchagent/kernel_arp_restore.sh b/dockers/docker-orchagent/kernel_arp_restore.sh new file mode 100755 index 00000000000..2300c5361b0 --- /dev/null +++ b/dockers/docker-orchagent/kernel_arp_restore.sh @@ -0,0 +1,49 @@ +#!/usr/bin/env bash + +function wait_for_intf_up { + INTF_NAME=$1 + + until [[ `ip link show $INTF_NAME | grep 'state UP'` ]]; do + sleep 1; + done +} + +function restore_arp_to_kernel { + ARP_FILE='/arp.json.1' + NUM_ENTRIES=`jq 'length' $ARP_FILE` + + for i in $( seq 0 $(($NUM_ENTRIES - 1)) ); do + + # For the ith object, get the first key + # 'jq' sorts the keys by default so this should always be + # the 'NEIGH_TABLE' key, not the 'OP' key + NUM_KEYS=`jq ".[$i] | keys | length" $ARP_FILE` + for j in $( seq 0 $(($NUM_KEYS - 1)) ); do + if [[ `jq ".[$i] | keys[$j] | startswith(\"NEIGH_TABLE\")" $ARP_FILE` == 'true' ]]; then + KEY=`jq ".[$i] | keys[$j]" $ARP_FILE` + break + fi + done + # For all 'jq' commands below, use '-r' for raw output + # to prevent double quoting + + # For the object associated with the 'NEIGH_TABLE' key + # store the value of the 'neigh' field (the MAC address) + MAC=`jq -r ".[$i][$KEY][\"neigh\"]" $ARP_FILE` + + # Split the 'NEIGH_TABLE' key with delimiter ':' and take the + # second item from the result array which is the device name + DEVICE=`echo $KEY | jq -r ". / \":\" | .[1]"` + + # Same as for VLAN, but take the third item which is the IP + IP=`echo $KEY | jq -r ". / \":\" | .[2]"` + + wait_for_intf_up $DEVICE + ip neigh replace "$IP" dev "$DEVICE" lladdr "$MAC" nud stale + done +} + +if [[ -f /restore-kernel ]]; then + restore_arp_to_kernel + rm -f /restore-kernel +fi diff --git a/dockers/docker-orchagent/supervisord.conf b/dockers/docker-orchagent/supervisord.conf index af68c8c38ab..93b5a271df0 100644 --- a/dockers/docker-orchagent/supervisord.conf +++ b/dockers/docker-orchagent/supervisord.conf @@ -71,6 +71,16 @@ stderr_logfile=syslog dependent_startup=true dependent_startup_wait_for=orchagent:running +[program:kernel_arp_restore] +command=/usr/bin/timeout 5m /usr/bin/kernel_arp_restore.sh +priority=6 +autostart=false +autorestart=false +stdout_logfile=syslog +stderr_logfile=syslog +dependent_startup=true +dependent_startup_wait_for=swssconfig:exited + [program:restore_neighbors] command=/usr/bin/restore_neighbors.py priority=6 diff --git a/dockers/docker-orchagent/swssconfig.sh b/dockers/docker-orchagent/swssconfig.sh index 0081d074ad6..ee99645b73e 100755 --- a/dockers/docker-orchagent/swssconfig.sh +++ b/dockers/docker-orchagent/swssconfig.sh @@ -2,15 +2,19 @@ set -e -function fast_reboot { - case "$(cat /proc/cmdline)" in - *fast-reboot*) +function restore_app_db { + + if [[ $(cat /proc/cmdline) == *"fast-reboot"* || -f /config-reload-restore ]]; + then if [[ -f /fdb.json ]]; then swssconfig /fdb.json mv -f /fdb.json /fdb.json.1 fi + fi + if [[ $(cat /proc/cmdline) == *"fast-reboot"* ]]; + then if [[ -f /arp.json ]]; then swssconfig /arp.json @@ -22,11 +26,20 @@ function fast_reboot { swssconfig /default_routes.json mv -f /default_routes.json /default_routes.json.1 fi + fi +} - ;; - *) - ;; - esac +function signal_kernel_restore { + if [[ -f /config-reload-restore ]]; + then + if [[ -f /arp.json ]]; + then + mv -f /arp.json /arp.json.1 + # Tell kernel_arp_restore.sh that it needs to act + touch /restore-kernel + fi + rm -f /config-reload-restore + fi } # Wait until swss.sh in the host system create file swss:/ready @@ -37,7 +50,8 @@ done rm -f /ready # Restore FDB and ARP table ASAP -fast_reboot +restore_app_db +signal_kernel_restore # read SONiC immutable variables [ -f /etc/sonic/sonic-environment ] && . /etc/sonic/sonic-environment diff --git a/files/build_templates/docker_image_ctl.j2 b/files/build_templates/docker_image_ctl.j2 index 388451fe720..96b0f86f479 100644 --- a/files/build_templates/docker_image_ctl.j2 +++ b/files/build_templates/docker_image_ctl.j2 @@ -173,6 +173,13 @@ function postStartAction() test -e /host/fast-reboot/default_routes.json && docker cp /host/fast-reboot/default_routes.json swss$DEV:/ rm -fr /host/fast-reboot fi + + if [[ -d /host/config-reload ]] && [[ -f /host/config-reload/needs-restore ]]; then + test -e /host/config-reload/fdb.json && docker cp /host/config-reload/fdb.json swss$DEV:/ + test -e /host/config-reload/arp.json && docker cp /host/config-reload/arp.json swss$DEV:/ + docker exec swss$DEV touch /config-reload-restore + rm -fr /host/config-reload + fi docker exec swss$DEV touch /ready # signal swssconfig.sh to go {%- elif docker_container_name == "pmon" %}