diff --git a/tests/pc/test_po_update.py b/tests/pc/test_po_update.py index 53c1ce99aad..99817ba8fa6 100644 --- a/tests/pc/test_po_update.py +++ b/tests/pc/test_po_update.py @@ -11,8 +11,11 @@ from tests.common import config_reload import ipaddress +from tests.common.platform.processes_utils import wait_critical_processes +from tests.common.reboot import wait_for_startup, reboot from tests.common.utilities import wait_until from tests.common.helpers.assertions import pytest_assert +from tests.common.helpers.sonic_db import VoqDbCli from tests.common.helpers.voq_helpers import verify_no_routes_from_nexthop pytestmark = [ @@ -374,3 +377,221 @@ def del_add_members(): pytest_assert( has_bgp_neighbors(duthost, pc) and wait_until(120, 10, 0, asichost.check_bgp_statistic, 'ipv4_idle', 0) or wait_until(10, 10, 0, pc_active, asichost, pc)) + + +def increment_lag_id(duthost, upper_lagid_start): + # Retrieve the current free LAG ID from the 'SYSTEM_LAG_IDS_FREE_LIST' in the CHASSIS_APP_DB + current_free_lagid = int(duthost.shell("sonic-db-cli CHASSIS_APP_DB lindex 'SYSTEM_LAG_IDS_FREE_LIST' 0")['stdout']) + # Temporary PortChannel name to be used in the configuration + tmp_pc = "PortChannel999" + # Loop through the range from current_free_lagid to upper_lagid_start (inclusive) + for i in range(current_free_lagid, upper_lagid_start + 1): + # Add the temporary PortChannel to increment the LAG ID + duthost.asics[0].config_portchannel(tmp_pc, "add") + # Remove the temporary PortChannel after incrementing the LAG ID + duthost.asics[0].config_portchannel(tmp_pc, "del") + + # Retrieve the current free LAG ID again after the modifications + current_free_lagid = int( + duthost.shell("sonic-db-cli CHASSIS_APP_DB lindex 'SYSTEM_LAG_IDS_FREE_LIST' 0")['stdout']) + logging.info("SYSTEM_LAG_IDS_FREE_LIST {}".format(current_free_lagid)) + # Assert that the current free LAG ID is greater than or equal to the upper limit (upper_lagid_start) + pytest_assert(current_free_lagid >= upper_lagid_start, + "Increment Lag ID Current:{},> Upper:{}".format(current_free_lagid, upper_lagid_start)) + + +def send_data(dut_mg_facts, duthost, ptfadapter): + # Create a list of tuples for each port channel interface, containing the IP address, peer address, + # port channel name, and its associated namespace. This is filtered for IPv4 addresses only. + peer_ip_pc_pair = [(pc["addr"], pc["peer_addr"], pc["attachto"], + dut_mg_facts["minigraph_portchannels"][pc["attachto"]]['namespace']) + for pc in dut_mg_facts["minigraph_portchannel_interfaces"] + if ipaddress.ip_address(pc['peer_addr']).version == 4] + + # Create a list of tuples where each tuple contains the port channel IP, peer IP, port channel name, + # members of the port channel, and its namespace. + pcs = [(pair[0], pair[1], pair[2], dut_mg_facts["minigraph_portchannels"][pair[2]]["members"], pair[3]) + for pair in peer_ip_pc_pair] + + # Iterate over each port channel pair to send and verify packets between them + for in_pc in pcs: + for out_pc in pcs: + # Skip if the input and output port channels are the same + if in_pc[2] == out_pc[2]: + continue + # Call the function to send and verify the packet between input and output port channels + send_and_verify_packet(in_pc, out_pc, dut_mg_facts, duthost, ptfadapter) + + +def send_and_verify_packet(in_pc, out_pc, dut_mg_facts, duthost, ptfadapter): + # Get the PTF interface index for the first member of the input port channel + in_ptf_index = dut_mg_facts["minigraph_ptf_indices"][in_pc[3][0]] + # Get the PTF interface indices for all members of the output port channel + out_ptf_indices = [dut_mg_facts["minigraph_ptf_indices"][port] for port in out_pc[3]] + + in_peer_ip = in_pc[1] + out_peer_ip = out_pc[1] + # Create a simple IP packet with the source and destination MAC addresses, IP source as input peer IP, + # and IP destination as output peer IP + pkt = testutils.simple_ip_packet( + eth_dst=duthost.asic_instance(duthost.get_asic_id_from_namespace(in_pc[4])).get_router_mac(), + eth_src=ptfadapter.dataplane.get_mac(0, in_ptf_index), + ip_src=in_peer_ip, + ip_dst=out_peer_ip) + + # Make a copy of the packet to define the expected packet + exp_pkt = pkt.copy() + exp_pkt = mask.Mask(exp_pkt) + # Ignore certain fields in the expected packet such as destination MAC, source MAC, IP checksum, and TTL + exp_pkt.set_do_not_care_scapy(packet.Ether, 'dst') + exp_pkt.set_do_not_care_scapy(packet.Ether, 'src') + exp_pkt.set_do_not_care_scapy(packet.IP, 'chksum') + exp_pkt.set_do_not_care_scapy(packet.IP, 'ttl') + + # Flush the dataplane before sending the packet + ptfadapter.dataplane.flush() + # Send the packet through the input port channel + testutils.send(ptfadapter, in_ptf_index, pkt) + # Verify the expected packet is received on any of the output port channel members + testutils.verify_packet_any_port(ptfadapter, exp_pkt, ports=out_ptf_indices) + + +def lag_set_sanity(duthosts): + system_lag_id = {} + # Create a VoqDbCli instance to interact with VOQ DB on the supervisor node + voqdb = VoqDbCli(duthosts.supervisor_nodes[0]) + + # Dump and store the current state of the SYSTEM_LAG_ID_TABLE + system_lag_id["SYSTEM_LAG_ID_TABLE"] = voqdb.dump("SYSTEM_LAG_ID_TABLE")["SYSTEM_LAG_ID_TABLE"]['value'] + # Dump the system LAG ID set, which holds the assigned LAG IDs + SYSTEM_LAG_ID_SET = voqdb.dump("SYSTEM_LAG_ID_SET")["SYSTEM_LAG_ID_SET"]['value'] + # Retrieve the start and end range for system LAG IDs from the database + end = int(voqdb.dump("SYSTEM_LAG_ID_END")["SYSTEM_LAG_ID_END"]['value']) + start = int(voqdb.dump("SYSTEM_LAG_ID_START")["SYSTEM_LAG_ID_START"]['value']) + # Retrieve the list of free LAG IDs from the database + LAG_IDS_FREE_LIST = voqdb.dump("SYSTEM_LAG_IDS_FREE_LIST")["SYSTEM_LAG_IDS_FREE_LIST"]['value'] + + def verify_system_lag_sanity(): + # Combine the free LAG IDs and assigned LAG IDs into a set to check for uniqueness + seen = set(LAG_IDS_FREE_LIST + SYSTEM_LAG_ID_SET) + + # Verify that the number of LAG IDs seen matches the expected range from start to end + if len(seen) != (end - start + 1): + logging.error( + "Missing or extra values are found in SYSTEM_LAG_IDS_FREE_LIST:{} or SYSTEM_LAG_ID_SET:{}".format( + LAG_IDS_FREE_LIST, SYSTEM_LAG_ID_SET)) + return False + + # Check for duplicate values in both the free and assigned LAG ID lists + if any(LAG_IDS_FREE_LIST.count(x) > 1 or SYSTEM_LAG_ID_SET.count( + x) > 1 or x in LAG_IDS_FREE_LIST and x in SYSTEM_LAG_ID_SET for x in seen): + logging.error( + "Duplicate values found in SYSTEM_LAG_IDS_FREE_LIST:{} or SYSTEM_LAG_ID_SET:{}".format( + LAG_IDS_FREE_LIST, SYSTEM_LAG_ID_SET)) + return False + # Log the current system LAG ID set for information purposes + logging.info(SYSTEM_LAG_ID_SET) + return True + + # Assert that the system LAG sanity check passes, using a wait_until function with a timeout + pytest_assert(wait_until(220, 10, 0, verify_system_lag_sanity)) + + +def test_po_update_with_higher_lagids( + duthosts, + enum_rand_one_per_hwsku_frontend_hostname, + tbinfo, + ptfadapter, + reload_testbed_on_failed, localhost): + """ + Test Port Channel Traffic with Higher LAG IDs: + + 1. The test involves rebooting the DUT, + which resets the LAG ID allocation, starting from 1. + 2. After the initial verification of traffic on the port channel (PC) mesh, the + LAG ID allocation is incremented by temporarily adding and deleting port channels. + 3. Verify the LAG set sanity and ensure traffic stability. + 4. Repeat the process for the higher LAG IDs. + """ + duthost = duthosts[enum_rand_one_per_hwsku_frontend_hostname] + + # Check if the device is a modular chassis and the topology is T2 + is_chassis = duthost.get_facts().get("modular_chassis") + if not (is_chassis and tbinfo['topo']['type'] == 't2' and duthost.facts['switch_type'] == "voq"): + # Skip the test if the setup is not T2 Chassis + pytest.skip("Test is Applicable for T2 VOQ Chassis Setup") + + dut_mg_facts = duthost.get_extended_minigraph_facts(tbinfo) + + # Send initial data to the device + send_data(dut_mg_facts, duthost, ptfadapter) + # Get the supervisor node (suphost) for the modular chassis setup + suphost = duthosts.supervisor_nodes[0] + # Get the established BGP neighbors from the DUT + up_bgp_neighbors = duthost.get_bgp_neighbors_per_asic("established") + + # Log information about cold reboot on the supervisor node + logging.info("Cold reboot on supervisor node: %s", suphost.hostname) + + # Reboot the supervisor node and wait for critical processes to restart + reboot(suphost, localhost, wait=240, safe_reboot=True) + logging.info("Wait until all critical processes are fully started") + wait_critical_processes(suphost) + + # Ensure all critical services have started on the supervisor node + pytest_assert(wait_until(330, 20, 0, suphost.critical_services_fully_started), + "All critical services should fully started! {}".format(suphost.hostname)) + + # For each linecard (frontend node), wait for startup and critical processes to start + for linecard in duthosts.frontend_nodes: + wait_for_startup(linecard, localhost, delay=10, timeout=300) + dut_uptime = linecard.get_up_time() + logging.info('DUT {} up since {}'.format(linecard.hostname, dut_uptime)) + + logging.info("Wait until all critical processes are fully started") + wait_critical_processes(linecard) + + # Ensure all critical services have started on the linecard + pytest_assert(wait_until(330, 20, 0, linecard.critical_services_fully_started), + "All critical services should fully started! {}".format(linecard.hostname)) + + # Perform a sanity check on the LAG set + lag_set_sanity(duthosts) + + # Increment LAG IDs up to 500 + increment_lag_id(duthost, 500) + + # Perform a Config Reload to put the new lag ids on Portchannel + config_reload(duthost, safe_reload=True) + + # Ensure BGP sessions are re-established after reload + pytest_assert(wait_until(300, 10, 0, + duthost.check_bgp_session_state_all_asics, up_bgp_neighbors, "established")) + + # Perform another LAG set sanity check + lag_set_sanity(duthosts) + + # Send data after the configuration reload + send_data(dut_mg_facts, duthost, ptfadapter) + + # Get the unique port channels from the minigraph facts + unique_portchannels = set([entry['attachto'] for entry in dut_mg_facts["minigraph_portchannel_interfaces"]]) + + # Calculate the increment value based on available port channels + inc = 1024 - len(unique_portchannels) + + # Increment LAG IDs based on the calculated increment value + increment_lag_id(duthost, inc) + + # Perform a Config Reload to put the new lag ids on Portchannel + config_reload(duthost, safe_reload=True) + + # Ensure BGP sessions are re-established after the second reload + pytest_assert(wait_until(300, 10, 0, + duthost.check_bgp_session_state_all_asics, up_bgp_neighbors, "established")) + + # Perform a final LAG set sanity check + lag_set_sanity(duthosts) + + # Send data one more time after the final sanity check + send_data(dut_mg_facts, duthost, ptfadapter)