diff --git a/device/arista/x86_64-arista_7050_qx32s/Arista-7050QX-32S-S4Q31/hwsku.json b/device/arista/x86_64-arista_7050_qx32s/Arista-7050QX-32S-S4Q31/hwsku.json index a407ccd840..0f17443e49 100644 --- a/device/arista/x86_64-arista_7050_qx32s/Arista-7050QX-32S-S4Q31/hwsku.json +++ b/device/arista/x86_64-arista_7050_qx32s/Arista-7050QX-32S-S4Q31/hwsku.json @@ -1,143 +1,131 @@ { "interfaces": { "Ethernet0": { - "default_brkout_mode": "1x10G", - "port_type": "RJ45" - }, - "Ethernet1": { - "default_brkout_mode": "1x10G", - "port_type": "RJ45" - }, - "Ethernet2": { - "default_brkout_mode": "1x10G", - "port_type": "RJ45" - }, - "Ethernet3": { - "default_brkout_mode": "1x1G", + "default_brkout_mode": "3x10G(3)+1x1G(1)", "port_type": "RJ45" }, "Ethernet4": { - "default_brkout_mode": "1x40G", + "default_brkout_mode": "1x40G[10G]", "port_type": "QSFP+" }, "Ethernet8": { - "default_brkout_mode": "1x40G", + "default_brkout_mode": "1x40G[10G]", "port_type": "QSFP+" }, "Ethernet12": { - "default_brkout_mode": "1x40G", + "default_brkout_mode": "1x40G[10G]", "port_type": "QSFP+" }, "Ethernet16": { - "default_brkout_mode": "1x40G", + "default_brkout_mode": "1x40G[10G]", "port_type": "QSFP+" }, "Ethernet20": { - "default_brkout_mode": "1x40G", + "default_brkout_mode": "1x40G[10G]", "port_type": "QSFP+" }, "Ethernet24": { - "default_brkout_mode": "1x40G", + "default_brkout_mode": "1x40G[10G]", "port_type": "QSFP+" }, "Ethernet28": { - "default_brkout_mode": "1x40G", + "default_brkout_mode": "1x40G[10G]", "port_type": "QSFP+" }, "Ethernet32": { - "default_brkout_mode": "1x40G", + "default_brkout_mode": "1x40G[10G]", "port_type": "QSFP+" }, "Ethernet36": { - "default_brkout_mode": "1x40G", + "default_brkout_mode": "1x40G[10G]", "port_type": "QSFP+" }, "Ethernet40": { - "default_brkout_mode": "1x40G", + "default_brkout_mode": "1x40G[10G]", "port_type": "QSFP+" }, "Ethernet44": { - "default_brkout_mode": "1x40G", + "default_brkout_mode": "1x40G[10G]", "port_type": "QSFP+" }, "Ethernet48": { - "default_brkout_mode": "1x40G", + "default_brkout_mode": "1x40G[10G]", "port_type": "QSFP+" }, "Ethernet52": { - "default_brkout_mode": "1x40G", + "default_brkout_mode": "1x40G[10G]", "port_type": "QSFP+" }, "Ethernet56": { - "default_brkout_mode": "1x40G", + "default_brkout_mode": "1x40G[10G]", "port_type": "QSFP+" }, "Ethernet60": { - "default_brkout_mode": "1x40G", + "default_brkout_mode": "1x40G[10G]", "port_type": "QSFP+" }, "Ethernet64": { - "default_brkout_mode": "1x40G", + "default_brkout_mode": "1x40G[10G]", "port_type": "QSFP+" }, "Ethernet68": { - "default_brkout_mode": "1x40G", + "default_brkout_mode": "1x40G[10G]", "port_type": "QSFP+" }, "Ethernet72": { - "default_brkout_mode": "1x40G", + "default_brkout_mode": "1x40G[10G]", "port_type": "QSFP+" }, "Ethernet76": { - "default_brkout_mode": "1x40G", + "default_brkout_mode": "1x40G[10G]", "port_type": "QSFP+" }, "Ethernet80": { - "default_brkout_mode": "1x40G", + "default_brkout_mode": "1x40G[10G]", "port_type": "QSFP+" }, "Ethernet84": { - "default_brkout_mode": "1x40G", + "default_brkout_mode": "1x40G[10G]", "port_type": "QSFP+" }, "Ethernet88": { - "default_brkout_mode": "1x40G", + "default_brkout_mode": "1x40G[10G]", "port_type": "QSFP+" }, "Ethernet92": { - "default_brkout_mode": "1x40G", + "default_brkout_mode": "1x40G[10G]", "port_type": "QSFP+" }, "Ethernet96": { - "default_brkout_mode": "1x40G", + "default_brkout_mode": "1x40G[10G]", "port_type": "QSFP+" }, "Ethernet100": { - "default_brkout_mode": "1x40G", + "default_brkout_mode": "1x40G[10G]", "port_type": "QSFP+" }, "Ethernet104": { - "default_brkout_mode": "1x40G", + "default_brkout_mode": "1x40G[10G]", "port_type": "QSFP+" }, "Ethernet108": { - "default_brkout_mode": "1x40G", + "default_brkout_mode": "1x40G[10G]", "port_type": "QSFP+" }, "Ethernet112": { - "default_brkout_mode": "1x40G", + "default_brkout_mode": "1x40G[10G]", "port_type": "QSFP+" }, "Ethernet116": { - "default_brkout_mode": "1x40G", + "default_brkout_mode": "1x40G[10G]", "port_type": "QSFP+" }, "Ethernet120": { - "default_brkout_mode": "1x40G", + "default_brkout_mode": "1x40G[10G]", "port_type": "QSFP+" }, "Ethernet124": { - "default_brkout_mode": "1x40G", + "default_brkout_mode": "1x40G[10G]", "port_type": "QSFP+" } } diff --git a/device/arista/x86_64-arista_7050_qx32s/platform.json b/device/arista/x86_64-arista_7050_qx32s/platform.json index 2bcc31a9a3..4309fe6569 100644 --- a/device/arista/x86_64-arista_7050_qx32s/platform.json +++ b/device/arista/x86_64-arista_7050_qx32s/platform.json @@ -218,6 +218,12 @@ "1x40G[10G]": [ "Ethernet5/1" ], + "3x10G(3)+1x1G(1)": [ + "Ethernet1", + "Ethernet2", + "Ethernet3", + "Ethernet4" + ], "2x20G[10G]": [ "Ethernet5/1", "Ethernet5/3" diff --git a/dockers/docker-lldp/lldpmgrd b/dockers/docker-lldp/lldpmgrd index e786a39ee0..213c11a960 100755 --- a/dockers/docker-lldp/lldpmgrd +++ b/dockers/docker-lldp/lldpmgrd @@ -23,6 +23,8 @@ try: from sonic_py_common import daemon_base from swsscommon import swsscommon from sonic_py_common.interface import inband_prefix, recirc_prefix + from sonic_py_common import device_info + except ImportError as err: raise ImportError("%s - required module not found" % str(err)) @@ -357,7 +359,8 @@ def run_cmd(self, cmd): def check_timeout(self, start_time): if time.time() - start_time > PORT_INIT_TIMEOUT: - self.log_error("Port init timeout reached ({} seconds), resuming lldpd...".format(PORT_INIT_TIMEOUT)) + if device_info.is_frontend_port_present_in_host(): + self.log_error("Port init timeout reached ({} seconds), resuming lldpd...".format(PORT_INIT_TIMEOUT)) return True return False diff --git a/files/build_templates/sonic_debian_extension.j2 b/files/build_templates/sonic_debian_extension.j2 index 499faff986..ab3a5c58a2 100644 --- a/files/build_templates/sonic_debian_extension.j2 +++ b/files/build_templates/sonic_debian_extension.j2 @@ -938,7 +938,8 @@ for fw_file_name in ${!FW_FILE_MAP[@]}; do # Link old FW location to not break existing automation/scripts sudo ln -s /host/image-$SONIC_IMAGE_VERSION/$PLATFORM_DIR/fw/asic/${FW_FILE_MAP[$fw_file_name]} $FILESYSTEM_ROOT/etc/mlnx/${FW_FILE_MAP[$fw_file_name]} done -sudo cp $files_path/$ISSU_VERSION_FILE $FILESYSTEM_ROOT/etc/mlnx/issu-version +sudo cp $files_path/$ISSU_VERSION_FILE $FILESYSTEM_ROOT/$PLATFORM_DIR/fw/asic/issu-version +sudo ln -s /host/image-$SONIC_IMAGE_VERSION/$PLATFORM_DIR/fw/asic/issu-version $FILESYSTEM_ROOT/etc/mlnx/issu-version sudo cp $files_path/$MLNX_FFB_SCRIPT $FILESYSTEM_ROOT/usr/bin/mlnx-ffb.sh sudo cp $files_path/$MLNX_ONIE_FW_UPDATE $FILESYSTEM_ROOT/usr/bin/$MLNX_ONIE_FW_UPDATE sudo cp $files_path/$MLNX_SSD_FW_UPDATE $FILESYSTEM_ROOT/usr/bin/$MLNX_SSD_FW_UPDATE diff --git a/files/scripts/arp_update b/files/scripts/arp_update index 4b25973cfc..f267e05a54 100755 --- a/files/scripts/arp_update +++ b/files/scripts/arp_update @@ -89,32 +89,47 @@ while /bin/true; do eval `eval $ip6cmd` if [[ $SUBTYPE == "dualtor" ]]; then - # manually set any remaining FAILED/INCOMPLETE entries to permanently INCOMPLETE - # this prevents any remaining INCOMPLETE entries from automatically transitioning to FAILED - # once these entries are incomplete, any subsequent neighbor advertisement messages - # are able to resolve the entry - - # generates the following command for each failed or incomplete IPv6 neighbor - # ip neigh replace dev nud incomplete - neigh_replace_template="sed -e 's/^/ip neigh replace /' -e 's/,/ dev /' -e 's/$/ nud incomplete;/'" - ip_neigh_replace_cmd="ip -6 neigh show | grep -v fe80 | grep $vlan | grep -E 'FAILED|INCOMPLETE' | cut -d ' ' -f 1,3 --output-delimiter=',' | $neigh_replace_template" - eval `eval $ip_neigh_replace_cmd` - - # on dual ToR devices, try to resolve failed neighbor entries since - # these entries will have tunnel routes installed, preventing normal - # neighbor resolution (SWSS PR #2137) - - # since ndisc6 is a userland process, the above ndisc6 commands are - # insufficient to update the kernel neighbor table for failed entries - - # we don't need to do this for ipv4 neighbors since arping is able to - # update the kernel neighbor table - - # generates the following command for each failed or incomplete IPv6 neighbor + # capture all current failed/incomplete IPv6 neighbors in the kernel to avoid situations where new neighbors are learned + # in the middle of the below sequence of commands + unresolved_kernel_neighbors=$(ip -6 neigh show | grep -v fe80 | grep $vlan | grep -E 'FAILED|INCOMPLETE') + failed_kernel_neighbors=$(echo "$unresolved_kernel_neighbors" | grep FAILED | cut -d ' ' -f 1) + + # it's possible for kernel neighbors to fall out of sync with the hardware + # this can result in failed neighbors entries that don't have corresponding zero MAC neighbor entries + # and therefore don't have tunnel routes installed in the hardware + # flush these neighbors from the kernel to force relearning and resync them to the hardware: + # 1. for every FAILED or INCOMPLETE neighbor in the kernel, check if there is a corresponding zero MAC neighbor in APPL_DB + # 2. if no zero MAC neighbor entry exists, flush the kernel neighbor entry + # - generates the command 'ip neigh flush ' for all such neighbors + unsync_neighbors=$(echo "$unresolved_kernel_neighbors" | cut -d ' ' -f 1 | xargs -I{} bash -c "if [[ -z \"\$(sonic-db-cli APPL_DB hget NEIGH_TABLE:$vlan:{} neigh)\" ]]; then echo '{}'; fi") + if [[ ! -z "$unsync_neighbors" ]]; then + ip_neigh_flush_cmd="echo \"$unsync_neighbors\" | sed -e 's/^/ip neigh flush /' -e 's/$/;/'" + eval `eval "$ip_neigh_flush_cmd"` + sleep 2 + fi + + # generates the following command for each FAILED or INCOMPLETE IPv6 neighbor # timeout 0.2 ping -n -q -i 0 -c 1 -W 1 -I >/dev/null - ping6_template="sed -e 's/^/timeout 0.2 ping /' -e 's/,/ -n -q -i 0 -c 1 -W 1 -I /' -e 's/$/ >\/dev\/null;/'" - failed_ip6_neigh_cmd="ip -6 neigh show | grep -v fe80 | grep $vlan | grep -E 'FAILED|INCOMPLETE' | cut -d ' ' -f 1,3 --output-delimiter=',' | $ping6_template" - eval `eval $failed_ip6_neigh_cmd` + if [[ ! -z "$unresolved_kernel_neighbors" ]]; then + ping6_template="sed -e 's/^/timeout 0.2 ping /' -e 's/,/ -n -q -i 0 -c 1 -W 1 -I /' -e 's/$/ >\/dev\/null;/'" + failed_ip6_neigh_cmd="echo \"$unresolved_kernel_neighbors\" | cut -d ' ' -f 1,3 --output-delimiter=',' | $ping6_template" + eval `eval "$failed_ip6_neigh_cmd"` + # allow some time for any transient INCOMPLETE neighbors to transition to FAILED + sleep 5 + fi + + # manually set any remaining FAILED entries to permanently INCOMPLETE + # once these entries are INCOMPLETE, any subsequent neighbor advertisement messages are able to resolve the entry + # ignore INCOMPLETE neighbors since if they are transiently incomplete (i.e. new kernel neighbors that we are attempting to resolve for the first time), + # setting them to permanently incomplete here means the kernel will never generate a netlink message for that neighbor + # generates the following command for each FAILED IPv6 neighbor + # ip neigh replace dev nud incomplete + failed_kernel_neighbors=$(ip -6 neigh show | grep -v fe80 | grep $vlan | grep -E 'FAILED') + if [[ ! -z "$failed_kernel_neighbors" ]]; then + neigh_replace_template="sed -e 's/^/ip neigh replace /' -e 's/,/ dev /' -e 's/$/ nud incomplete;/'" + ip_neigh_replace_cmd="echo \"$failed_kernel_neighbors\" | cut -d ' ' -f 1,3 --output-delimiter=',' | $neigh_replace_template" + eval `eval "$ip_neigh_replace_cmd"` + fi fi done diff --git a/platform/mellanox/mlnx-ffb.sh b/platform/mellanox/mlnx-ffb.sh index afe42c8198..75faeb514c 100755 --- a/platform/mellanox/mlnx-ffb.sh +++ b/platform/mellanox/mlnx-ffb.sh @@ -1,6 +1,6 @@ #!/bin/bash # -# Copyright (c) 2018-2021 NVIDIA CORPORATION & AFFILIATES. +# Copyright (c) 2018-2023 NVIDIA CORPORATION & AFFILIATES. # Apache-2.0 # # Licensed under the Apache License, Version 2.0 (the "License"); @@ -49,41 +49,47 @@ check_sdk_upgrade() return "${FFB_SUCCESS}" fi - while :; do - mkdir -p "${FS_MOUNTPOINT}" - mount -t squashfs "${FS_PATH}" "${FS_MOUNTPOINT}" || { - >&2 echo "Failed to mount next SONiC image" + ISSU_VERSION_FILE_PATH="/etc/mlnx/issu-version" + CURRENT_ISSU_VERSION="$(cat ${ISSU_VERSION_FILE_PATH})" + NEXT_ISSU_VERSION="Unknown" + + # /host/image-/platform/fw/asic/issu-version is now the new location for ISSU version. + NEXT_IMAGE_ISSU_VERSION_FILE_PATH="/host/image-${NEXT_SONIC_IMAGE#SONiC-OS-}/platform/fw/asic/issu-version" + + if [ -f "${NEXT_IMAGE_ISSU_VERSION_FILE_PATH}" ]; then + NEXT_ISSU_VERSION="$(cat ${NEXT_IMAGE_ISSU_VERSION_FILE_PATH})" + else + while :; do + mkdir -p "${FS_MOUNTPOINT}" + mount -t squashfs "${FS_PATH}" "${FS_MOUNTPOINT}" || { + >&2 echo "Failed to mount next SONiC image" + break + } + + [ -f "${ISSU_VERSION_FILE_PATH}" ] || { + >&2 echo "No ISSU version file found ${ISSU_VERSION_FILE_PATH}" + break + } + + [ -f "${FS_MOUNTPOINT}/${ISSU_VERSION_FILE_PATH}" ] || { + >&2 echo "No ISSU version file found ${ISSU_VERSION_FILE_PATH} in ${NEXT_SONIC_IMAGE}" + break + } + NEXT_ISSU_VERSION="$(cat ${FS_MOUNTPOINT}/${ISSU_VERSION_FILE_PATH})" break - } + done - ISSU_VERSION_FILE_PATH="/etc/mlnx/issu-version" - - [ -f "${ISSU_VERSION_FILE_PATH}" ] || { - >&2 echo "No ISSU version file found ${ISSU_VERSION_FILE_PATH}" - break - } - - [ -f "${FS_MOUNTPOINT}/${ISSU_VERSION_FILE_PATH}" ] || { - >&2 echo "No ISSU version file found ${ISSU_VERSION_FILE_PATH} in ${NEXT_SONIC_IMAGE}" - break - } - - CURRENT_ISSU_VERSION="$(cat ${ISSU_VERSION_FILE_PATH})" - NEXT_ISSU_VERSION="$(cat ${FS_MOUNTPOINT}/${ISSU_VERSION_FILE_PATH})" - - if [[ "${CURRENT_ISSU_VERSION}" == "${NEXT_ISSU_VERSION}" ]]; then - CHECK_RESULT="${FFB_SUCCESS}" - else - >&2 echo "Current and next ISSU version do not match:" - >&2 echo "Current ISSU version: ${CURRENT_ISSU_VERSION}" - >&2 echo "Next ISSU version: ${NEXT_ISSU_VERSION}" - fi - - break - done + umount -rf "${FS_MOUNTPOINT}" 2> /dev/null || true + rm -rf "${FS_MOUNTPOINT}" 2> /dev/null || true + fi - umount -rf "${FS_MOUNTPOINT}" 2> /dev/null || true - rm -rf "${FS_MOUNTPOINT}" 2> /dev/null || true + if [[ "${CURRENT_ISSU_VERSION}" == "${NEXT_ISSU_VERSION}" ]]; then + CHECK_RESULT="${FFB_SUCCESS}" + else + >&2 echo "Current and next ISSU version do not match:" + >&2 echo "Current ISSU version: ${CURRENT_ISSU_VERSION}" + >&2 echo "Next ISSU version: ${NEXT_ISSU_VERSION}" + fi return "${CHECK_RESULT}" } diff --git a/src/sonic-py-common/sonic_py_common/device_info.py b/src/sonic-py-common/sonic_py_common/device_info.py index e2b5f50028..4198a1394e 100644 --- a/src/sonic-py-common/sonic_py_common/device_info.py +++ b/src/sonic-py-common/sonic_py_common/device_info.py @@ -682,3 +682,13 @@ def is_fast_reboot_enabled(): state_db.close(state_db.STATE_DB) return fb_enable_state + + +def is_frontend_port_present_in_host(): + if is_supervisor(): + return False + if is_multi_npu(): + namespace_id = os.getenv("NAMESPACE_ID") + if not namespace_id: + return False + return True diff --git a/src/sonic-sairedis b/src/sonic-sairedis index ec81223a4b..82a132f921 160000 --- a/src/sonic-sairedis +++ b/src/sonic-sairedis @@ -1 +1 @@ -Subproject commit ec81223a4bad1336ecfebf93ef859d9feafd445f +Subproject commit 82a132f921d006937f37e3993515f774799c1204 diff --git a/src/sonic-swss-common b/src/sonic-swss-common index 9569f3a500..8a53f0490c 160000 --- a/src/sonic-swss-common +++ b/src/sonic-swss-common @@ -1 +1 @@ -Subproject commit 9569f3a5007bcc1fc0c4842fc4fd233e6265b1d0 +Subproject commit 8a53f0490c54bb608f72249c6df995b1d40098bc diff --git a/src/system-health/health_checker/sysmonitor.py b/src/system-health/health_checker/sysmonitor.py index 8979327eaf..5f0456b7c7 100755 --- a/src/system-health/health_checker/sysmonitor.py +++ b/src/system-health/health_checker/sysmonitor.py @@ -11,6 +11,7 @@ from . import utils from sonic_py_common.task_base import ProcessTaskBase from .config import Config +import signal SYSLOG_IDENTIFIER = "system#monitor" REDIS_TIMEOUT_MS = 0 @@ -117,6 +118,8 @@ def __init__(self): self.state_db = None self.config_db = None self.config = Config() + self.mpmgr = multiprocessing.Manager() + self.myQ = self.mpmgr.Queue() #Sets system ready status to state db def post_system_status(self, state): @@ -422,13 +425,11 @@ def system_service(self): self.state_db = swsscommon.SonicV2Connector(use_unix_socket_path=True) self.state_db.connect(self.state_db.STATE_DB) - mpmgr = multiprocessing.Manager() - myQ = mpmgr.Queue() try: - monitor_system_bus = MonitorSystemBusTask(myQ) + monitor_system_bus = MonitorSystemBusTask(self.myQ) monitor_system_bus.task_run() - monitor_statedb_table = MonitorStateDbTask(myQ) + monitor_statedb_table = MonitorStateDbTask(self.myQ) monitor_statedb_table.task_run() except Exception as e: @@ -442,7 +443,7 @@ def system_service(self): # Queue to receive the STATEDB and Systemd state change event while not self.task_stopping_event.is_set(): try: - msg = myQ.get(timeout=QUEUE_TIMEOUT) + msg = self.myQ.get(timeout=QUEUE_TIMEOUT) event = msg["unit"] event_src = msg["evt_src"] event_time = msg["time"] @@ -466,5 +467,24 @@ def task_worker(self): return self.system_service() + def task_stop(self): + # Signal the process to stop + self.task_stopping_event.set() + #Clear the resources of mpmgr- Queue + self.mpmgr.shutdown() + + # Wait for the process to exit + self._task_process.join(self._stop_timeout_secs) + + # If the process didn't exit, attempt to kill it + if self._task_process.is_alive(): + logger.log_notice("Attempting to kill sysmon main process with pid {}".format(self._task_process.pid)) + os.kill(self._task_process.pid, signal.SIGKILL) + + if self._task_process.is_alive(): + logger.log_error("Sysmon main process with pid {} could not be killed".format(self._task_process.pid)) + return False + + return True