diff --git a/files/scripts/swss.sh b/files/scripts/swss.sh index e4af468cec5..4e1dc328726 100755 --- a/files/scripts/swss.sh +++ b/files/scripts/swss.sh @@ -107,6 +107,120 @@ function clean_up_tables() end" 0 } +# This function attempts to delete entries using a specified delete function, retrying up to a maximum number of times if the deletion fails. +# Arguments: +# $1 - delete_func: Name of the function to call for deleting the entry. +# $2 - host: Host identifier for the deletion operation. +# $3 - asic: ASIC identifier for the deletion operation. +# $4 - entry_name: Name of the entry being deleted (used for logging). +# +# Behavior: +# - Calls the delete function with host and asic as arguments. +# - If the deletion succeeds (exit code 0), prints the result and exits. +# - If the deletion fails, logs the attempt and retries up to max_retries times (default: 6). +# - Waits 10 seconds between retries. +# - If all attempts fail, logs an error and returns 1. +function retry_delete_entries() { + local delete_func=$1 + local host=$2 + local asic=$3 + local max_retries=3 + local attempt=1 + local result + while true; do + result=$($delete_func "$host" "$asic") + if [[ $? -eq 0 ]]; then + echo $result + break + else + debug "retrying $delete_func failed host=$host asic=$asic(attempt $attempt/$max_retries)" + if [[ $attempt -ge $max_retries ]]; then + debug "Error: $delete_func failed after $max_retries attempts for host=$host asic=$asic" + return 1 + fi + attempt=$((attempt+1)) + sleep 10 + fi + done +} +# Function to delete SYSTEM_NEIGH entries for a given host and asic +function delete_system_neigh_entries() { + local_host=$1 + local_asic=$2 + $SONIC_DB_CLI CHASSIS_APP_DB EVAL " + local nn = 0 + local host = string.gsub(ARGV[1], '%-', '%%-') + local dev = ARGV[2] + local ps = 'SYSTEM_NEIGH*|' .. host .. '|' .. dev + local keylist = redis.call('KEYS', 'SYSTEM_NEIGH*') + for j,key in ipairs(keylist) do + if string.match(key, ps) ~= nil then + redis.call('DEL', key) + nn = nn + 1 + end + end + return nn" 0 $local_host $local_asic +} +# Function to delete SYSTEM_INTERFACE entries for a given host and asic +function delete_system_interface_entries() { + local_host=$1 + local_asic=$2 + $SONIC_DB_CLI CHASSIS_APP_DB EVAL " + local nsi = 0 + local host = string.gsub(ARGV[1], '%-', '%%-') + local dev = ARGV[2] + local ps = 'SYSTEM_INTERFACE*|' .. host .. '|' .. dev + local keylist = redis.call('KEYS', 'SYSTEM_INTERFACE*') + for j,key in ipairs(keylist) do + if string.match(key, ps) ~= nil then + redis.call('DEL', key) + nsi = nsi + 1 + end + end + return nsi" 0 $local_host $local_asic + } +# Function to delete SYSTEM_LAG_MEMBER_TABLE entries for a given host and asic +function delete_system_lag_member_entries() { + local_host=$1 + local_asic=$2 + $SONIC_DB_CLI CHASSIS_APP_DB EVAL " + local nlm = 0 + local host = string.gsub(ARGV[1], '%-', '%%-') + local dev = ARGV[2] + local ps = 'SYSTEM_LAG_MEMBER_TABLE*|' .. host .. '|' .. dev + local keylist = redis.call('KEYS', 'SYSTEM_LAG_MEMBER_TABLE*') + for j,key in ipairs(keylist) do + if string.match(key, ps) ~= nil then + redis.call('DEL', key) + nlm = nlm + 1 + end + end + return nlm" 0 $local_host $local_asic +} + +function delete_system_lag_entries() { + local_host=$1 + local_asic=$2 + $SONIC_DB_CLI CHASSIS_APP_DB EVAL " + local nsl = 0 + local host = string.gsub(ARGV[1], '%-', '%%-') + local dev = ARGV[2] + local ps = 'SYSTEM_LAG_TABLE*|' .. '(' .. host .. '|' .. dev ..'.*' .. ')' + local keylist = redis.call('KEYS', 'SYSTEM_LAG_TABLE*') + for j,key in ipairs(keylist) do + local lagname = string.match(key, ps) + if lagname ~= nil then + redis.call('DEL', key) + local lagid = redis.call('HGET', 'SYSTEM_LAG_ID_TABLE', lagname) + redis.call('SREM', 'SYSTEM_LAG_ID_SET', lagid) + redis.call('HDEL', 'SYSTEM_LAG_ID_TABLE', lagname) + redis.call('rpush', 'SYSTEM_LAG_IDS_FREE_LIST', lagid) + nsl = nsl + 1 + end + end + return nsl" 0 $local_host $local_asic +} + # This function cleans up the chassis db table entries created ONLY by this asic # This is used to do the clean up operation when the line card / asic reboots # When the asic/lc is RE-booting, the chassis db server is supposed to be running @@ -150,20 +264,8 @@ function clean_up_chassis_db_tables() done debug "Chassis db clean up for ${SERVICE}$DEV. asic=$asic" - # First, delete SYSTEM_NEIGH entries - num_neigh=`$SONIC_DB_CLI CHASSIS_APP_DB EVAL " - local nn = 0 - local host = string.gsub(ARGV[1], '%-', '%%-') - local dev = ARGV[2] - local ps = 'SYSTEM_NEIGH*|' .. host .. '|' .. dev - local keylist = redis.call('KEYS', 'SYSTEM_NEIGH*') - for j,key in ipairs(keylist) do - if string.match(key, ps) ~= nil then - redis.call('DEL', key) - nn = nn + 1 - end - end - return nn" 0 $lc $asic` + # First, delete SYSTEM_NEIGH entries using a dedicated function + num_neigh=$(retry_delete_entries delete_system_neigh_entries "$lc" "$asic") debug "Chassis db clean up for ${SERVICE}$DEV. Number of SYSTEM_NEIGH entries deleted: $num_neigh" @@ -177,37 +279,16 @@ function clean_up_chassis_db_tables() sleep 30 fi - # Next, delete SYSTEM_INTERFACE entries - num_sys_intf=`$SONIC_DB_CLI CHASSIS_APP_DB EVAL " - local nsi = 0 - local host = string.gsub(ARGV[1], '%-', '%%-') - local dev = ARGV[2] - local ps = 'SYSTEM_INTERFACE*|' .. host .. '|' .. dev - local keylist = redis.call('KEYS', 'SYSTEM_INTERFACE*') - for j,key in ipairs(keylist) do - if string.match(key, ps) ~= nil then - redis.call('DEL', key) - nsi = nsi + 1 - end - end - return nsi" 0 $lc $asic` + # Next, delete SYSTEM_INTERFACE entries + num_sys_intf=$(retry_delete_entries delete_system_interface_entries "$lc" "$asic") debug "Chassis db clean up for ${SERVICE}$DEV. Number of SYSTEM_INTERFACE entries deleted: $num_sys_intf" + if [[ $num_sys_intf > 0 ]]; then + sleep 15 + fi - # Next, delete SYSTEM_LAG_MEMBER_TABLE entries - num_lag_mem=`$SONIC_DB_CLI CHASSIS_APP_DB EVAL " - local nlm = 0 - local host = string.gsub(ARGV[1], '%-', '%%-') - local dev = ARGV[2] - local ps = 'SYSTEM_LAG_MEMBER_TABLE*|' .. host .. '|' .. dev - local keylist = redis.call('KEYS', 'SYSTEM_LAG_MEMBER_TABLE*') - for j,key in ipairs(keylist) do - if string.match(key, ps) ~= nil then - redis.call('DEL', key) - nlm = nlm + 1 - end - end - return nlm" 0 $lc $asic` + # Next, delete SYSTEM_LAG_MEMBER_TABLE entries using a dedicated function + num_lag_mem=$(retry_delete_entries delete_system_lag_member_entries "$lc" "$asic") debug "Chassis db clean up for ${SERVICE}$DEV. Number of SYSTEM_LAG_MEMBER_TABLE entries deleted: $num_lag_mem" @@ -220,24 +301,7 @@ function clean_up_chassis_db_tables() fi # Finally, delete SYSTEM_LAG_TABLE entries and deallot LAG IDs - num_sys_lag=`$SONIC_DB_CLI CHASSIS_APP_DB EVAL " - local nsl = 0 - local host = string.gsub(ARGV[1], '%-', '%%-') - local dev = ARGV[2] - local ps = 'SYSTEM_LAG_TABLE*|' .. '(' .. host .. '|' .. dev ..'.*' .. ')' - local keylist = redis.call('KEYS', 'SYSTEM_LAG_TABLE*') - for j,key in ipairs(keylist) do - local lagname = string.match(key, ps) - if lagname ~= nil then - redis.call('DEL', key) - local lagid = redis.call('HGET', 'SYSTEM_LAG_ID_TABLE', lagname) - redis.call('SREM', 'SYSTEM_LAG_ID_SET', lagid) - redis.call('HDEL', 'SYSTEM_LAG_ID_TABLE', lagname) - redis.call('rpush', 'SYSTEM_LAG_IDS_FREE_LIST', lagid) - nsl = nsl + 1 - end - end - return nsl" 0 $lc $asic` + num_sys_lag=$(retry_delete_entries delete_system_lag_entries "$lc" "$asic") debug "Chassis db clean up for ${SERVICE}$DEV. Number of SYSTEM_LAG_TABLE entries deleted: $num_sys_lag"